sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.LOCK, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1432 1433 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1434 1435 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1436 1437 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1438 1439 ODBC_DATETIME_LITERALS = { 1440 "d": exp.Date, 1441 "t": exp.Time, 1442 "ts": exp.Timestamp, 1443 } 1444 1445 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1446 1447 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1448 1449 # The style options for the DESCRIBE statement 1450 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1451 1452 # The style options for the ANALYZE statement 1453 ANALYZE_STYLES = { 1454 "BUFFER_USAGE_LIMIT", 1455 "FULL", 1456 "LOCAL", 1457 "NO_WRITE_TO_BINLOG", 1458 "SAMPLE", 1459 "SKIP_LOCKED", 1460 "VERBOSE", 1461 } 1462 1463 ANALYZE_EXPRESSION_PARSERS = { 1464 "ALL": lambda self: self._parse_analyze_columns(), 1465 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1466 "DELETE": lambda self: self._parse_analyze_delete(), 1467 "DROP": lambda self: self._parse_analyze_histogram(), 1468 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1469 "LIST": lambda self: self._parse_analyze_list(), 1470 "PREDICATE": lambda self: self._parse_analyze_columns(), 1471 "UPDATE": lambda self: self._parse_analyze_histogram(), 1472 "VALIDATE": lambda self: self._parse_analyze_validate(), 1473 } 1474 1475 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1476 1477 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1478 1479 OPERATION_MODIFIERS: t.Set[str] = set() 1480 1481 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1482 1483 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1484 1485 STRICT_CAST = True 1486 1487 PREFIXED_PIVOT_COLUMNS = False 1488 IDENTIFY_PIVOT_STRINGS = False 1489 1490 LOG_DEFAULTS_TO_LN = False 1491 1492 # Whether the table sample clause expects CSV syntax 1493 TABLESAMPLE_CSV = False 1494 1495 # The default method used for table sampling 1496 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1497 1498 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1499 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1500 1501 # Whether the TRIM function expects the characters to trim as its first argument 1502 TRIM_PATTERN_FIRST = False 1503 1504 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1505 STRING_ALIASES = False 1506 1507 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1508 MODIFIERS_ATTACHED_TO_SET_OP = True 1509 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1510 1511 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1512 NO_PAREN_IF_COMMANDS = True 1513 1514 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1515 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1516 1517 # Whether the `:` operator is used to extract a value from a VARIANT column 1518 COLON_IS_VARIANT_EXTRACT = False 1519 1520 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1521 # If this is True and '(' is not found, the keyword will be treated as an identifier 1522 VALUES_FOLLOWED_BY_PAREN = True 1523 1524 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1525 SUPPORTS_IMPLICIT_UNNEST = False 1526 1527 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1528 INTERVAL_SPANS = True 1529 1530 # Whether a PARTITION clause can follow a table reference 1531 SUPPORTS_PARTITION_SELECTION = False 1532 1533 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1534 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1535 1536 # Whether the 'AS' keyword is optional in the CTE definition syntax 1537 OPTIONAL_ALIAS_TOKEN_CTE = True 1538 1539 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1540 ALTER_RENAME_REQUIRES_COLUMN = True 1541 1542 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1543 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1544 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1545 # as BigQuery, where all joins have the same precedence. 1546 JOINS_HAVE_EQUAL_PRECEDENCE = False 1547 1548 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1549 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1550 1551 # Whether map literals support arbitrary expressions as keys. 1552 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1553 # When False, keys are typically restricted to identifiers. 1554 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1555 1556 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1557 # is true for Snowflake but not for BigQuery which can also process strings 1558 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1559 1560 # Dialects like Databricks support JOINS without join criteria 1561 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1562 ADD_JOIN_ON_TRUE = False 1563 1564 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1565 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1566 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1567 1568 __slots__ = ( 1569 "error_level", 1570 "error_message_context", 1571 "max_errors", 1572 "dialect", 1573 "sql", 1574 "errors", 1575 "_tokens", 1576 "_index", 1577 "_curr", 1578 "_next", 1579 "_prev", 1580 "_prev_comments", 1581 "_pipe_cte_counter", 1582 ) 1583 1584 # Autofilled 1585 SHOW_TRIE: t.Dict = {} 1586 SET_TRIE: t.Dict = {} 1587 1588 def __init__( 1589 self, 1590 error_level: t.Optional[ErrorLevel] = None, 1591 error_message_context: int = 100, 1592 max_errors: int = 3, 1593 dialect: DialectType = None, 1594 ): 1595 from sqlglot.dialects import Dialect 1596 1597 self.error_level = error_level or ErrorLevel.IMMEDIATE 1598 self.error_message_context = error_message_context 1599 self.max_errors = max_errors 1600 self.dialect = Dialect.get_or_raise(dialect) 1601 self.reset() 1602 1603 def reset(self): 1604 self.sql = "" 1605 self.errors = [] 1606 self._tokens = [] 1607 self._index = 0 1608 self._curr = None 1609 self._next = None 1610 self._prev = None 1611 self._prev_comments = None 1612 self._pipe_cte_counter = 0 1613 1614 def parse( 1615 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1616 ) -> t.List[t.Optional[exp.Expression]]: 1617 """ 1618 Parses a list of tokens and returns a list of syntax trees, one tree 1619 per parsed SQL statement. 1620 1621 Args: 1622 raw_tokens: The list of tokens. 1623 sql: The original SQL string, used to produce helpful debug messages. 1624 1625 Returns: 1626 The list of the produced syntax trees. 1627 """ 1628 return self._parse( 1629 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1630 ) 1631 1632 def parse_into( 1633 self, 1634 expression_types: exp.IntoType, 1635 raw_tokens: t.List[Token], 1636 sql: t.Optional[str] = None, 1637 ) -> t.List[t.Optional[exp.Expression]]: 1638 """ 1639 Parses a list of tokens into a given Expression type. If a collection of Expression 1640 types is given instead, this method will try to parse the token list into each one 1641 of them, stopping at the first for which the parsing succeeds. 1642 1643 Args: 1644 expression_types: The expression type(s) to try and parse the token list into. 1645 raw_tokens: The list of tokens. 1646 sql: The original SQL string, used to produce helpful debug messages. 1647 1648 Returns: 1649 The target Expression. 1650 """ 1651 errors = [] 1652 for expression_type in ensure_list(expression_types): 1653 parser = self.EXPRESSION_PARSERS.get(expression_type) 1654 if not parser: 1655 raise TypeError(f"No parser registered for {expression_type}") 1656 1657 try: 1658 return self._parse(parser, raw_tokens, sql) 1659 except ParseError as e: 1660 e.errors[0]["into_expression"] = expression_type 1661 errors.append(e) 1662 1663 raise ParseError( 1664 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1665 errors=merge_errors(errors), 1666 ) from errors[-1] 1667 1668 def _parse( 1669 self, 1670 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1671 raw_tokens: t.List[Token], 1672 sql: t.Optional[str] = None, 1673 ) -> t.List[t.Optional[exp.Expression]]: 1674 self.reset() 1675 self.sql = sql or "" 1676 1677 total = len(raw_tokens) 1678 chunks: t.List[t.List[Token]] = [[]] 1679 1680 for i, token in enumerate(raw_tokens): 1681 if token.token_type == TokenType.SEMICOLON: 1682 if token.comments: 1683 chunks.append([token]) 1684 1685 if i < total - 1: 1686 chunks.append([]) 1687 else: 1688 chunks[-1].append(token) 1689 1690 expressions = [] 1691 1692 for tokens in chunks: 1693 self._index = -1 1694 self._tokens = tokens 1695 self._advance() 1696 1697 expressions.append(parse_method(self)) 1698 1699 if self._index < len(self._tokens): 1700 self.raise_error("Invalid expression / Unexpected token") 1701 1702 self.check_errors() 1703 1704 return expressions 1705 1706 def check_errors(self) -> None: 1707 """Logs or raises any found errors, depending on the chosen error level setting.""" 1708 if self.error_level == ErrorLevel.WARN: 1709 for error in self.errors: 1710 logger.error(str(error)) 1711 elif self.error_level == ErrorLevel.RAISE and self.errors: 1712 raise ParseError( 1713 concat_messages(self.errors, self.max_errors), 1714 errors=merge_errors(self.errors), 1715 ) 1716 1717 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1718 """ 1719 Appends an error in the list of recorded errors or raises it, depending on the chosen 1720 error level setting. 1721 """ 1722 token = token or self._curr or self._prev or Token.string("") 1723 start = token.start 1724 end = token.end + 1 1725 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1726 highlight = self.sql[start:end] 1727 end_context = self.sql[end : end + self.error_message_context] 1728 1729 error = ParseError.new( 1730 f"{message}. Line {token.line}, Col: {token.col}.\n" 1731 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1732 description=message, 1733 line=token.line, 1734 col=token.col, 1735 start_context=start_context, 1736 highlight=highlight, 1737 end_context=end_context, 1738 ) 1739 1740 if self.error_level == ErrorLevel.IMMEDIATE: 1741 raise error 1742 1743 self.errors.append(error) 1744 1745 def expression( 1746 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1747 ) -> E: 1748 """ 1749 Creates a new, validated Expression. 1750 1751 Args: 1752 exp_class: The expression class to instantiate. 1753 comments: An optional list of comments to attach to the expression. 1754 kwargs: The arguments to set for the expression along with their respective values. 1755 1756 Returns: 1757 The target expression. 1758 """ 1759 instance = exp_class(**kwargs) 1760 instance.add_comments(comments) if comments else self._add_comments(instance) 1761 return self.validate_expression(instance) 1762 1763 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1764 if expression and self._prev_comments: 1765 expression.add_comments(self._prev_comments) 1766 self._prev_comments = None 1767 1768 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1769 """ 1770 Validates an Expression, making sure that all its mandatory arguments are set. 1771 1772 Args: 1773 expression: The expression to validate. 1774 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1775 1776 Returns: 1777 The validated expression. 1778 """ 1779 if self.error_level != ErrorLevel.IGNORE: 1780 for error_message in expression.error_messages(args): 1781 self.raise_error(error_message) 1782 1783 return expression 1784 1785 def _find_sql(self, start: Token, end: Token) -> str: 1786 return self.sql[start.start : end.end + 1] 1787 1788 def _is_connected(self) -> bool: 1789 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1790 1791 def _advance(self, times: int = 1) -> None: 1792 self._index += times 1793 self._curr = seq_get(self._tokens, self._index) 1794 self._next = seq_get(self._tokens, self._index + 1) 1795 1796 if self._index > 0: 1797 self._prev = self._tokens[self._index - 1] 1798 self._prev_comments = self._prev.comments 1799 else: 1800 self._prev = None 1801 self._prev_comments = None 1802 1803 def _retreat(self, index: int) -> None: 1804 if index != self._index: 1805 self._advance(index - self._index) 1806 1807 def _warn_unsupported(self) -> None: 1808 if len(self._tokens) <= 1: 1809 return 1810 1811 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1812 # interested in emitting a warning for the one being currently processed. 1813 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1814 1815 logger.warning( 1816 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1817 ) 1818 1819 def _parse_command(self) -> exp.Command: 1820 self._warn_unsupported() 1821 return self.expression( 1822 exp.Command, 1823 comments=self._prev_comments, 1824 this=self._prev.text.upper(), 1825 expression=self._parse_string(), 1826 ) 1827 1828 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1829 """ 1830 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1831 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1832 solve this by setting & resetting the parser state accordingly 1833 """ 1834 index = self._index 1835 error_level = self.error_level 1836 1837 self.error_level = ErrorLevel.IMMEDIATE 1838 try: 1839 this = parse_method() 1840 except ParseError: 1841 this = None 1842 finally: 1843 if not this or retreat: 1844 self._retreat(index) 1845 self.error_level = error_level 1846 1847 return this 1848 1849 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1850 start = self._prev 1851 exists = self._parse_exists() if allow_exists else None 1852 1853 self._match(TokenType.ON) 1854 1855 materialized = self._match_text_seq("MATERIALIZED") 1856 kind = self._match_set(self.CREATABLES) and self._prev 1857 if not kind: 1858 return self._parse_as_command(start) 1859 1860 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1861 this = self._parse_user_defined_function(kind=kind.token_type) 1862 elif kind.token_type == TokenType.TABLE: 1863 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1864 elif kind.token_type == TokenType.COLUMN: 1865 this = self._parse_column() 1866 else: 1867 this = self._parse_id_var() 1868 1869 self._match(TokenType.IS) 1870 1871 return self.expression( 1872 exp.Comment, 1873 this=this, 1874 kind=kind.text, 1875 expression=self._parse_string(), 1876 exists=exists, 1877 materialized=materialized, 1878 ) 1879 1880 def _parse_to_table( 1881 self, 1882 ) -> exp.ToTableProperty: 1883 table = self._parse_table_parts(schema=True) 1884 return self.expression(exp.ToTableProperty, this=table) 1885 1886 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1887 def _parse_ttl(self) -> exp.Expression: 1888 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1889 this = self._parse_bitwise() 1890 1891 if self._match_text_seq("DELETE"): 1892 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1893 if self._match_text_seq("RECOMPRESS"): 1894 return self.expression( 1895 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1896 ) 1897 if self._match_text_seq("TO", "DISK"): 1898 return self.expression( 1899 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1900 ) 1901 if self._match_text_seq("TO", "VOLUME"): 1902 return self.expression( 1903 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1904 ) 1905 1906 return this 1907 1908 expressions = self._parse_csv(_parse_ttl_action) 1909 where = self._parse_where() 1910 group = self._parse_group() 1911 1912 aggregates = None 1913 if group and self._match(TokenType.SET): 1914 aggregates = self._parse_csv(self._parse_set_item) 1915 1916 return self.expression( 1917 exp.MergeTreeTTL, 1918 expressions=expressions, 1919 where=where, 1920 group=group, 1921 aggregates=aggregates, 1922 ) 1923 1924 def _parse_statement(self) -> t.Optional[exp.Expression]: 1925 if self._curr is None: 1926 return None 1927 1928 if self._match_set(self.STATEMENT_PARSERS): 1929 comments = self._prev_comments 1930 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1931 stmt.add_comments(comments, prepend=True) 1932 return stmt 1933 1934 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1935 return self._parse_command() 1936 1937 expression = self._parse_expression() 1938 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1939 return self._parse_query_modifiers(expression) 1940 1941 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1942 start = self._prev 1943 temporary = self._match(TokenType.TEMPORARY) 1944 materialized = self._match_text_seq("MATERIALIZED") 1945 1946 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1947 if not kind: 1948 return self._parse_as_command(start) 1949 1950 concurrently = self._match_text_seq("CONCURRENTLY") 1951 if_exists = exists or self._parse_exists() 1952 1953 if kind == "COLUMN": 1954 this = self._parse_column() 1955 else: 1956 this = self._parse_table_parts( 1957 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1958 ) 1959 1960 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1961 1962 if self._match(TokenType.L_PAREN, advance=False): 1963 expressions = self._parse_wrapped_csv(self._parse_types) 1964 else: 1965 expressions = None 1966 1967 return self.expression( 1968 exp.Drop, 1969 exists=if_exists, 1970 this=this, 1971 expressions=expressions, 1972 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1973 temporary=temporary, 1974 materialized=materialized, 1975 cascade=self._match_text_seq("CASCADE"), 1976 constraints=self._match_text_seq("CONSTRAINTS"), 1977 purge=self._match_text_seq("PURGE"), 1978 cluster=cluster, 1979 concurrently=concurrently, 1980 ) 1981 1982 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1983 return ( 1984 self._match_text_seq("IF") 1985 and (not not_ or self._match(TokenType.NOT)) 1986 and self._match(TokenType.EXISTS) 1987 ) 1988 1989 def _parse_create(self) -> exp.Create | exp.Command: 1990 # Note: this can't be None because we've matched a statement parser 1991 start = self._prev 1992 1993 replace = ( 1994 start.token_type == TokenType.REPLACE 1995 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1996 or self._match_pair(TokenType.OR, TokenType.ALTER) 1997 ) 1998 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1999 2000 unique = self._match(TokenType.UNIQUE) 2001 2002 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2003 clustered = True 2004 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2005 "COLUMNSTORE" 2006 ): 2007 clustered = False 2008 else: 2009 clustered = None 2010 2011 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2012 self._advance() 2013 2014 properties = None 2015 create_token = self._match_set(self.CREATABLES) and self._prev 2016 2017 if not create_token: 2018 # exp.Properties.Location.POST_CREATE 2019 properties = self._parse_properties() 2020 create_token = self._match_set(self.CREATABLES) and self._prev 2021 2022 if not properties or not create_token: 2023 return self._parse_as_command(start) 2024 2025 concurrently = self._match_text_seq("CONCURRENTLY") 2026 exists = self._parse_exists(not_=True) 2027 this = None 2028 expression: t.Optional[exp.Expression] = None 2029 indexes = None 2030 no_schema_binding = None 2031 begin = None 2032 end = None 2033 clone = None 2034 2035 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2036 nonlocal properties 2037 if properties and temp_props: 2038 properties.expressions.extend(temp_props.expressions) 2039 elif temp_props: 2040 properties = temp_props 2041 2042 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2043 this = self._parse_user_defined_function(kind=create_token.token_type) 2044 2045 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2046 extend_props(self._parse_properties()) 2047 2048 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2049 extend_props(self._parse_properties()) 2050 2051 if not expression: 2052 if self._match(TokenType.COMMAND): 2053 expression = self._parse_as_command(self._prev) 2054 else: 2055 begin = self._match(TokenType.BEGIN) 2056 return_ = self._match_text_seq("RETURN") 2057 2058 if self._match(TokenType.STRING, advance=False): 2059 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2060 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2061 expression = self._parse_string() 2062 extend_props(self._parse_properties()) 2063 else: 2064 expression = self._parse_user_defined_function_expression() 2065 2066 end = self._match_text_seq("END") 2067 2068 if return_: 2069 expression = self.expression(exp.Return, this=expression) 2070 elif create_token.token_type == TokenType.INDEX: 2071 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2072 if not self._match(TokenType.ON): 2073 index = self._parse_id_var() 2074 anonymous = False 2075 else: 2076 index = None 2077 anonymous = True 2078 2079 this = self._parse_index(index=index, anonymous=anonymous) 2080 elif create_token.token_type in self.DB_CREATABLES: 2081 table_parts = self._parse_table_parts( 2082 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2083 ) 2084 2085 # exp.Properties.Location.POST_NAME 2086 self._match(TokenType.COMMA) 2087 extend_props(self._parse_properties(before=True)) 2088 2089 this = self._parse_schema(this=table_parts) 2090 2091 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2092 extend_props(self._parse_properties()) 2093 2094 has_alias = self._match(TokenType.ALIAS) 2095 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2096 # exp.Properties.Location.POST_ALIAS 2097 extend_props(self._parse_properties()) 2098 2099 if create_token.token_type == TokenType.SEQUENCE: 2100 expression = self._parse_types() 2101 props = self._parse_properties() 2102 if props: 2103 sequence_props = exp.SequenceProperties() 2104 options = [] 2105 for prop in props: 2106 if isinstance(prop, exp.SequenceProperties): 2107 for arg, value in prop.args.items(): 2108 if arg == "options": 2109 options.extend(value) 2110 else: 2111 sequence_props.set(arg, value) 2112 prop.pop() 2113 2114 if options: 2115 sequence_props.set("options", options) 2116 2117 props.append("expressions", sequence_props) 2118 extend_props(props) 2119 else: 2120 expression = self._parse_ddl_select() 2121 2122 # Some dialects also support using a table as an alias instead of a SELECT. 2123 # Here we fallback to this as an alternative. 2124 if not expression and has_alias: 2125 expression = self._try_parse(self._parse_table_parts) 2126 2127 if create_token.token_type == TokenType.TABLE: 2128 # exp.Properties.Location.POST_EXPRESSION 2129 extend_props(self._parse_properties()) 2130 2131 indexes = [] 2132 while True: 2133 index = self._parse_index() 2134 2135 # exp.Properties.Location.POST_INDEX 2136 extend_props(self._parse_properties()) 2137 if not index: 2138 break 2139 else: 2140 self._match(TokenType.COMMA) 2141 indexes.append(index) 2142 elif create_token.token_type == TokenType.VIEW: 2143 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2144 no_schema_binding = True 2145 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2146 extend_props(self._parse_properties()) 2147 2148 shallow = self._match_text_seq("SHALLOW") 2149 2150 if self._match_texts(self.CLONE_KEYWORDS): 2151 copy = self._prev.text.lower() == "copy" 2152 clone = self.expression( 2153 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2154 ) 2155 2156 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2157 return self._parse_as_command(start) 2158 2159 create_kind_text = create_token.text.upper() 2160 return self.expression( 2161 exp.Create, 2162 this=this, 2163 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2164 replace=replace, 2165 refresh=refresh, 2166 unique=unique, 2167 expression=expression, 2168 exists=exists, 2169 properties=properties, 2170 indexes=indexes, 2171 no_schema_binding=no_schema_binding, 2172 begin=begin, 2173 end=end, 2174 clone=clone, 2175 concurrently=concurrently, 2176 clustered=clustered, 2177 ) 2178 2179 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2180 seq = exp.SequenceProperties() 2181 2182 options = [] 2183 index = self._index 2184 2185 while self._curr: 2186 self._match(TokenType.COMMA) 2187 if self._match_text_seq("INCREMENT"): 2188 self._match_text_seq("BY") 2189 self._match_text_seq("=") 2190 seq.set("increment", self._parse_term()) 2191 elif self._match_text_seq("MINVALUE"): 2192 seq.set("minvalue", self._parse_term()) 2193 elif self._match_text_seq("MAXVALUE"): 2194 seq.set("maxvalue", self._parse_term()) 2195 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2196 self._match_text_seq("=") 2197 seq.set("start", self._parse_term()) 2198 elif self._match_text_seq("CACHE"): 2199 # T-SQL allows empty CACHE which is initialized dynamically 2200 seq.set("cache", self._parse_number() or True) 2201 elif self._match_text_seq("OWNED", "BY"): 2202 # "OWNED BY NONE" is the default 2203 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2204 else: 2205 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2206 if opt: 2207 options.append(opt) 2208 else: 2209 break 2210 2211 seq.set("options", options if options else None) 2212 return None if self._index == index else seq 2213 2214 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2215 # only used for teradata currently 2216 self._match(TokenType.COMMA) 2217 2218 kwargs = { 2219 "no": self._match_text_seq("NO"), 2220 "dual": self._match_text_seq("DUAL"), 2221 "before": self._match_text_seq("BEFORE"), 2222 "default": self._match_text_seq("DEFAULT"), 2223 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2224 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2225 "after": self._match_text_seq("AFTER"), 2226 "minimum": self._match_texts(("MIN", "MINIMUM")), 2227 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2228 } 2229 2230 if self._match_texts(self.PROPERTY_PARSERS): 2231 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2232 try: 2233 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2234 except TypeError: 2235 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2236 2237 return None 2238 2239 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2240 return self._parse_wrapped_csv(self._parse_property) 2241 2242 def _parse_property(self) -> t.Optional[exp.Expression]: 2243 if self._match_texts(self.PROPERTY_PARSERS): 2244 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2245 2246 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2247 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2248 2249 if self._match_text_seq("COMPOUND", "SORTKEY"): 2250 return self._parse_sortkey(compound=True) 2251 2252 if self._match_text_seq("SQL", "SECURITY"): 2253 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2254 2255 index = self._index 2256 2257 seq_props = self._parse_sequence_properties() 2258 if seq_props: 2259 return seq_props 2260 2261 self._retreat(index) 2262 key = self._parse_column() 2263 2264 if not self._match(TokenType.EQ): 2265 self._retreat(index) 2266 return None 2267 2268 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2269 if isinstance(key, exp.Column): 2270 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2271 2272 value = self._parse_bitwise() or self._parse_var(any_token=True) 2273 2274 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2275 if isinstance(value, exp.Column): 2276 value = exp.var(value.name) 2277 2278 return self.expression(exp.Property, this=key, value=value) 2279 2280 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2281 if self._match_text_seq("BY"): 2282 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2283 2284 self._match(TokenType.ALIAS) 2285 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2286 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2287 2288 return self.expression( 2289 exp.FileFormatProperty, 2290 this=( 2291 self.expression( 2292 exp.InputOutputFormat, 2293 input_format=input_format, 2294 output_format=output_format, 2295 ) 2296 if input_format or output_format 2297 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2298 ), 2299 hive_format=True, 2300 ) 2301 2302 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2303 field = self._parse_field() 2304 if isinstance(field, exp.Identifier) and not field.quoted: 2305 field = exp.var(field) 2306 2307 return field 2308 2309 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2310 self._match(TokenType.EQ) 2311 self._match(TokenType.ALIAS) 2312 2313 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2314 2315 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2316 properties = [] 2317 while True: 2318 if before: 2319 prop = self._parse_property_before() 2320 else: 2321 prop = self._parse_property() 2322 if not prop: 2323 break 2324 for p in ensure_list(prop): 2325 properties.append(p) 2326 2327 if properties: 2328 return self.expression(exp.Properties, expressions=properties) 2329 2330 return None 2331 2332 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2333 return self.expression( 2334 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2335 ) 2336 2337 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2338 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2339 security_specifier = self._prev.text.upper() 2340 return self.expression(exp.SecurityProperty, this=security_specifier) 2341 return None 2342 2343 def _parse_settings_property(self) -> exp.SettingsProperty: 2344 return self.expression( 2345 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2346 ) 2347 2348 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2349 if self._index >= 2: 2350 pre_volatile_token = self._tokens[self._index - 2] 2351 else: 2352 pre_volatile_token = None 2353 2354 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2355 return exp.VolatileProperty() 2356 2357 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2358 2359 def _parse_retention_period(self) -> exp.Var: 2360 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2361 number = self._parse_number() 2362 number_str = f"{number} " if number else "" 2363 unit = self._parse_var(any_token=True) 2364 return exp.var(f"{number_str}{unit}") 2365 2366 def _parse_system_versioning_property( 2367 self, with_: bool = False 2368 ) -> exp.WithSystemVersioningProperty: 2369 self._match(TokenType.EQ) 2370 prop = self.expression( 2371 exp.WithSystemVersioningProperty, 2372 **{ # type: ignore 2373 "on": True, 2374 "with": with_, 2375 }, 2376 ) 2377 2378 if self._match_text_seq("OFF"): 2379 prop.set("on", False) 2380 return prop 2381 2382 self._match(TokenType.ON) 2383 if self._match(TokenType.L_PAREN): 2384 while self._curr and not self._match(TokenType.R_PAREN): 2385 if self._match_text_seq("HISTORY_TABLE", "="): 2386 prop.set("this", self._parse_table_parts()) 2387 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2388 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2389 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2390 prop.set("retention_period", self._parse_retention_period()) 2391 2392 self._match(TokenType.COMMA) 2393 2394 return prop 2395 2396 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2397 self._match(TokenType.EQ) 2398 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2399 prop = self.expression(exp.DataDeletionProperty, on=on) 2400 2401 if self._match(TokenType.L_PAREN): 2402 while self._curr and not self._match(TokenType.R_PAREN): 2403 if self._match_text_seq("FILTER_COLUMN", "="): 2404 prop.set("filter_column", self._parse_column()) 2405 elif self._match_text_seq("RETENTION_PERIOD", "="): 2406 prop.set("retention_period", self._parse_retention_period()) 2407 2408 self._match(TokenType.COMMA) 2409 2410 return prop 2411 2412 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2413 kind = "HASH" 2414 expressions: t.Optional[t.List[exp.Expression]] = None 2415 if self._match_text_seq("BY", "HASH"): 2416 expressions = self._parse_wrapped_csv(self._parse_id_var) 2417 elif self._match_text_seq("BY", "RANDOM"): 2418 kind = "RANDOM" 2419 2420 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2421 buckets: t.Optional[exp.Expression] = None 2422 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2423 buckets = self._parse_number() 2424 2425 return self.expression( 2426 exp.DistributedByProperty, 2427 expressions=expressions, 2428 kind=kind, 2429 buckets=buckets, 2430 order=self._parse_order(), 2431 ) 2432 2433 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2434 self._match_text_seq("KEY") 2435 expressions = self._parse_wrapped_id_vars() 2436 return self.expression(expr_type, expressions=expressions) 2437 2438 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2439 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2440 prop = self._parse_system_versioning_property(with_=True) 2441 self._match_r_paren() 2442 return prop 2443 2444 if self._match(TokenType.L_PAREN, advance=False): 2445 return self._parse_wrapped_properties() 2446 2447 if self._match_text_seq("JOURNAL"): 2448 return self._parse_withjournaltable() 2449 2450 if self._match_texts(self.VIEW_ATTRIBUTES): 2451 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2452 2453 if self._match_text_seq("DATA"): 2454 return self._parse_withdata(no=False) 2455 elif self._match_text_seq("NO", "DATA"): 2456 return self._parse_withdata(no=True) 2457 2458 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2459 return self._parse_serde_properties(with_=True) 2460 2461 if self._match(TokenType.SCHEMA): 2462 return self.expression( 2463 exp.WithSchemaBindingProperty, 2464 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2465 ) 2466 2467 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2468 return self.expression( 2469 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2470 ) 2471 2472 if not self._next: 2473 return None 2474 2475 return self._parse_withisolatedloading() 2476 2477 def _parse_procedure_option(self) -> exp.Expression | None: 2478 if self._match_text_seq("EXECUTE", "AS"): 2479 return self.expression( 2480 exp.ExecuteAsProperty, 2481 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2482 or self._parse_string(), 2483 ) 2484 2485 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2486 2487 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2488 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2489 self._match(TokenType.EQ) 2490 2491 user = self._parse_id_var() 2492 self._match(TokenType.PARAMETER) 2493 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2494 2495 if not user or not host: 2496 return None 2497 2498 return exp.DefinerProperty(this=f"{user}@{host}") 2499 2500 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2501 self._match(TokenType.TABLE) 2502 self._match(TokenType.EQ) 2503 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2504 2505 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2506 return self.expression(exp.LogProperty, no=no) 2507 2508 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2509 return self.expression(exp.JournalProperty, **kwargs) 2510 2511 def _parse_checksum(self) -> exp.ChecksumProperty: 2512 self._match(TokenType.EQ) 2513 2514 on = None 2515 if self._match(TokenType.ON): 2516 on = True 2517 elif self._match_text_seq("OFF"): 2518 on = False 2519 2520 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2521 2522 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2523 return self.expression( 2524 exp.Cluster, 2525 expressions=( 2526 self._parse_wrapped_csv(self._parse_ordered) 2527 if wrapped 2528 else self._parse_csv(self._parse_ordered) 2529 ), 2530 ) 2531 2532 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2533 self._match_text_seq("BY") 2534 2535 self._match_l_paren() 2536 expressions = self._parse_csv(self._parse_column) 2537 self._match_r_paren() 2538 2539 if self._match_text_seq("SORTED", "BY"): 2540 self._match_l_paren() 2541 sorted_by = self._parse_csv(self._parse_ordered) 2542 self._match_r_paren() 2543 else: 2544 sorted_by = None 2545 2546 self._match(TokenType.INTO) 2547 buckets = self._parse_number() 2548 self._match_text_seq("BUCKETS") 2549 2550 return self.expression( 2551 exp.ClusteredByProperty, 2552 expressions=expressions, 2553 sorted_by=sorted_by, 2554 buckets=buckets, 2555 ) 2556 2557 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2558 if not self._match_text_seq("GRANTS"): 2559 self._retreat(self._index - 1) 2560 return None 2561 2562 return self.expression(exp.CopyGrantsProperty) 2563 2564 def _parse_freespace(self) -> exp.FreespaceProperty: 2565 self._match(TokenType.EQ) 2566 return self.expression( 2567 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2568 ) 2569 2570 def _parse_mergeblockratio( 2571 self, no: bool = False, default: bool = False 2572 ) -> exp.MergeBlockRatioProperty: 2573 if self._match(TokenType.EQ): 2574 return self.expression( 2575 exp.MergeBlockRatioProperty, 2576 this=self._parse_number(), 2577 percent=self._match(TokenType.PERCENT), 2578 ) 2579 2580 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2581 2582 def _parse_datablocksize( 2583 self, 2584 default: t.Optional[bool] = None, 2585 minimum: t.Optional[bool] = None, 2586 maximum: t.Optional[bool] = None, 2587 ) -> exp.DataBlocksizeProperty: 2588 self._match(TokenType.EQ) 2589 size = self._parse_number() 2590 2591 units = None 2592 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2593 units = self._prev.text 2594 2595 return self.expression( 2596 exp.DataBlocksizeProperty, 2597 size=size, 2598 units=units, 2599 default=default, 2600 minimum=minimum, 2601 maximum=maximum, 2602 ) 2603 2604 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2605 self._match(TokenType.EQ) 2606 always = self._match_text_seq("ALWAYS") 2607 manual = self._match_text_seq("MANUAL") 2608 never = self._match_text_seq("NEVER") 2609 default = self._match_text_seq("DEFAULT") 2610 2611 autotemp = None 2612 if self._match_text_seq("AUTOTEMP"): 2613 autotemp = self._parse_schema() 2614 2615 return self.expression( 2616 exp.BlockCompressionProperty, 2617 always=always, 2618 manual=manual, 2619 never=never, 2620 default=default, 2621 autotemp=autotemp, 2622 ) 2623 2624 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2625 index = self._index 2626 no = self._match_text_seq("NO") 2627 concurrent = self._match_text_seq("CONCURRENT") 2628 2629 if not self._match_text_seq("ISOLATED", "LOADING"): 2630 self._retreat(index) 2631 return None 2632 2633 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2634 return self.expression( 2635 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2636 ) 2637 2638 def _parse_locking(self) -> exp.LockingProperty: 2639 if self._match(TokenType.TABLE): 2640 kind = "TABLE" 2641 elif self._match(TokenType.VIEW): 2642 kind = "VIEW" 2643 elif self._match(TokenType.ROW): 2644 kind = "ROW" 2645 elif self._match_text_seq("DATABASE"): 2646 kind = "DATABASE" 2647 else: 2648 kind = None 2649 2650 if kind in ("DATABASE", "TABLE", "VIEW"): 2651 this = self._parse_table_parts() 2652 else: 2653 this = None 2654 2655 if self._match(TokenType.FOR): 2656 for_or_in = "FOR" 2657 elif self._match(TokenType.IN): 2658 for_or_in = "IN" 2659 else: 2660 for_or_in = None 2661 2662 if self._match_text_seq("ACCESS"): 2663 lock_type = "ACCESS" 2664 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2665 lock_type = "EXCLUSIVE" 2666 elif self._match_text_seq("SHARE"): 2667 lock_type = "SHARE" 2668 elif self._match_text_seq("READ"): 2669 lock_type = "READ" 2670 elif self._match_text_seq("WRITE"): 2671 lock_type = "WRITE" 2672 elif self._match_text_seq("CHECKSUM"): 2673 lock_type = "CHECKSUM" 2674 else: 2675 lock_type = None 2676 2677 override = self._match_text_seq("OVERRIDE") 2678 2679 return self.expression( 2680 exp.LockingProperty, 2681 this=this, 2682 kind=kind, 2683 for_or_in=for_or_in, 2684 lock_type=lock_type, 2685 override=override, 2686 ) 2687 2688 def _parse_partition_by(self) -> t.List[exp.Expression]: 2689 if self._match(TokenType.PARTITION_BY): 2690 return self._parse_csv(self._parse_assignment) 2691 return [] 2692 2693 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2694 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2695 if self._match_text_seq("MINVALUE"): 2696 return exp.var("MINVALUE") 2697 if self._match_text_seq("MAXVALUE"): 2698 return exp.var("MAXVALUE") 2699 return self._parse_bitwise() 2700 2701 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2702 expression = None 2703 from_expressions = None 2704 to_expressions = None 2705 2706 if self._match(TokenType.IN): 2707 this = self._parse_wrapped_csv(self._parse_bitwise) 2708 elif self._match(TokenType.FROM): 2709 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2710 self._match_text_seq("TO") 2711 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2712 elif self._match_text_seq("WITH", "(", "MODULUS"): 2713 this = self._parse_number() 2714 self._match_text_seq(",", "REMAINDER") 2715 expression = self._parse_number() 2716 self._match_r_paren() 2717 else: 2718 self.raise_error("Failed to parse partition bound spec.") 2719 2720 return self.expression( 2721 exp.PartitionBoundSpec, 2722 this=this, 2723 expression=expression, 2724 from_expressions=from_expressions, 2725 to_expressions=to_expressions, 2726 ) 2727 2728 # https://www.postgresql.org/docs/current/sql-createtable.html 2729 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2730 if not self._match_text_seq("OF"): 2731 self._retreat(self._index - 1) 2732 return None 2733 2734 this = self._parse_table(schema=True) 2735 2736 if self._match(TokenType.DEFAULT): 2737 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2738 elif self._match_text_seq("FOR", "VALUES"): 2739 expression = self._parse_partition_bound_spec() 2740 else: 2741 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2742 2743 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2744 2745 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2746 self._match(TokenType.EQ) 2747 return self.expression( 2748 exp.PartitionedByProperty, 2749 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2750 ) 2751 2752 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2753 if self._match_text_seq("AND", "STATISTICS"): 2754 statistics = True 2755 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2756 statistics = False 2757 else: 2758 statistics = None 2759 2760 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2761 2762 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2763 if self._match_text_seq("SQL"): 2764 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2765 return None 2766 2767 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2768 if self._match_text_seq("SQL", "DATA"): 2769 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2770 return None 2771 2772 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2773 if self._match_text_seq("PRIMARY", "INDEX"): 2774 return exp.NoPrimaryIndexProperty() 2775 if self._match_text_seq("SQL"): 2776 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2777 return None 2778 2779 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2780 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2781 return exp.OnCommitProperty() 2782 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2783 return exp.OnCommitProperty(delete=True) 2784 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2785 2786 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2787 if self._match_text_seq("SQL", "DATA"): 2788 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2789 return None 2790 2791 def _parse_distkey(self) -> exp.DistKeyProperty: 2792 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2793 2794 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2795 table = self._parse_table(schema=True) 2796 2797 options = [] 2798 while self._match_texts(("INCLUDING", "EXCLUDING")): 2799 this = self._prev.text.upper() 2800 2801 id_var = self._parse_id_var() 2802 if not id_var: 2803 return None 2804 2805 options.append( 2806 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2807 ) 2808 2809 return self.expression(exp.LikeProperty, this=table, expressions=options) 2810 2811 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2812 return self.expression( 2813 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2814 ) 2815 2816 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2817 self._match(TokenType.EQ) 2818 return self.expression( 2819 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2820 ) 2821 2822 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2823 self._match_text_seq("WITH", "CONNECTION") 2824 return self.expression( 2825 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2826 ) 2827 2828 def _parse_returns(self) -> exp.ReturnsProperty: 2829 value: t.Optional[exp.Expression] 2830 null = None 2831 is_table = self._match(TokenType.TABLE) 2832 2833 if is_table: 2834 if self._match(TokenType.LT): 2835 value = self.expression( 2836 exp.Schema, 2837 this="TABLE", 2838 expressions=self._parse_csv(self._parse_struct_types), 2839 ) 2840 if not self._match(TokenType.GT): 2841 self.raise_error("Expecting >") 2842 else: 2843 value = self._parse_schema(exp.var("TABLE")) 2844 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2845 null = True 2846 value = None 2847 else: 2848 value = self._parse_types() 2849 2850 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2851 2852 def _parse_describe(self) -> exp.Describe: 2853 kind = self._match_set(self.CREATABLES) and self._prev.text 2854 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2855 if self._match(TokenType.DOT): 2856 style = None 2857 self._retreat(self._index - 2) 2858 2859 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2860 2861 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2862 this = self._parse_statement() 2863 else: 2864 this = self._parse_table(schema=True) 2865 2866 properties = self._parse_properties() 2867 expressions = properties.expressions if properties else None 2868 partition = self._parse_partition() 2869 return self.expression( 2870 exp.Describe, 2871 this=this, 2872 style=style, 2873 kind=kind, 2874 expressions=expressions, 2875 partition=partition, 2876 format=format, 2877 ) 2878 2879 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2880 kind = self._prev.text.upper() 2881 expressions = [] 2882 2883 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2884 if self._match(TokenType.WHEN): 2885 expression = self._parse_disjunction() 2886 self._match(TokenType.THEN) 2887 else: 2888 expression = None 2889 2890 else_ = self._match(TokenType.ELSE) 2891 2892 if not self._match(TokenType.INTO): 2893 return None 2894 2895 return self.expression( 2896 exp.ConditionalInsert, 2897 this=self.expression( 2898 exp.Insert, 2899 this=self._parse_table(schema=True), 2900 expression=self._parse_derived_table_values(), 2901 ), 2902 expression=expression, 2903 else_=else_, 2904 ) 2905 2906 expression = parse_conditional_insert() 2907 while expression is not None: 2908 expressions.append(expression) 2909 expression = parse_conditional_insert() 2910 2911 return self.expression( 2912 exp.MultitableInserts, 2913 kind=kind, 2914 comments=comments, 2915 expressions=expressions, 2916 source=self._parse_table(), 2917 ) 2918 2919 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2920 comments = [] 2921 hint = self._parse_hint() 2922 overwrite = self._match(TokenType.OVERWRITE) 2923 ignore = self._match(TokenType.IGNORE) 2924 local = self._match_text_seq("LOCAL") 2925 alternative = None 2926 is_function = None 2927 2928 if self._match_text_seq("DIRECTORY"): 2929 this: t.Optional[exp.Expression] = self.expression( 2930 exp.Directory, 2931 this=self._parse_var_or_string(), 2932 local=local, 2933 row_format=self._parse_row_format(match_row=True), 2934 ) 2935 else: 2936 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2937 comments += ensure_list(self._prev_comments) 2938 return self._parse_multitable_inserts(comments) 2939 2940 if self._match(TokenType.OR): 2941 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2942 2943 self._match(TokenType.INTO) 2944 comments += ensure_list(self._prev_comments) 2945 self._match(TokenType.TABLE) 2946 is_function = self._match(TokenType.FUNCTION) 2947 2948 this = ( 2949 self._parse_table(schema=True, parse_partition=True) 2950 if not is_function 2951 else self._parse_function() 2952 ) 2953 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2954 this.set("alias", self._parse_table_alias()) 2955 2956 returning = self._parse_returning() 2957 2958 return self.expression( 2959 exp.Insert, 2960 comments=comments, 2961 hint=hint, 2962 is_function=is_function, 2963 this=this, 2964 stored=self._match_text_seq("STORED") and self._parse_stored(), 2965 by_name=self._match_text_seq("BY", "NAME"), 2966 exists=self._parse_exists(), 2967 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2968 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2969 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2970 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2971 conflict=self._parse_on_conflict(), 2972 returning=returning or self._parse_returning(), 2973 overwrite=overwrite, 2974 alternative=alternative, 2975 ignore=ignore, 2976 source=self._match(TokenType.TABLE) and self._parse_table(), 2977 ) 2978 2979 def _parse_kill(self) -> exp.Kill: 2980 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2981 2982 return self.expression( 2983 exp.Kill, 2984 this=self._parse_primary(), 2985 kind=kind, 2986 ) 2987 2988 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2989 conflict = self._match_text_seq("ON", "CONFLICT") 2990 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2991 2992 if not conflict and not duplicate: 2993 return None 2994 2995 conflict_keys = None 2996 constraint = None 2997 2998 if conflict: 2999 if self._match_text_seq("ON", "CONSTRAINT"): 3000 constraint = self._parse_id_var() 3001 elif self._match(TokenType.L_PAREN): 3002 conflict_keys = self._parse_csv(self._parse_id_var) 3003 self._match_r_paren() 3004 3005 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3006 if self._prev.token_type == TokenType.UPDATE: 3007 self._match(TokenType.SET) 3008 expressions = self._parse_csv(self._parse_equality) 3009 else: 3010 expressions = None 3011 3012 return self.expression( 3013 exp.OnConflict, 3014 duplicate=duplicate, 3015 expressions=expressions, 3016 action=action, 3017 conflict_keys=conflict_keys, 3018 constraint=constraint, 3019 where=self._parse_where(), 3020 ) 3021 3022 def _parse_returning(self) -> t.Optional[exp.Returning]: 3023 if not self._match(TokenType.RETURNING): 3024 return None 3025 return self.expression( 3026 exp.Returning, 3027 expressions=self._parse_csv(self._parse_expression), 3028 into=self._match(TokenType.INTO) and self._parse_table_part(), 3029 ) 3030 3031 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3032 if not self._match(TokenType.FORMAT): 3033 return None 3034 return self._parse_row_format() 3035 3036 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3037 index = self._index 3038 with_ = with_ or self._match_text_seq("WITH") 3039 3040 if not self._match(TokenType.SERDE_PROPERTIES): 3041 self._retreat(index) 3042 return None 3043 return self.expression( 3044 exp.SerdeProperties, 3045 **{ # type: ignore 3046 "expressions": self._parse_wrapped_properties(), 3047 "with": with_, 3048 }, 3049 ) 3050 3051 def _parse_row_format( 3052 self, match_row: bool = False 3053 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3054 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3055 return None 3056 3057 if self._match_text_seq("SERDE"): 3058 this = self._parse_string() 3059 3060 serde_properties = self._parse_serde_properties() 3061 3062 return self.expression( 3063 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3064 ) 3065 3066 self._match_text_seq("DELIMITED") 3067 3068 kwargs = {} 3069 3070 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3071 kwargs["fields"] = self._parse_string() 3072 if self._match_text_seq("ESCAPED", "BY"): 3073 kwargs["escaped"] = self._parse_string() 3074 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3075 kwargs["collection_items"] = self._parse_string() 3076 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3077 kwargs["map_keys"] = self._parse_string() 3078 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3079 kwargs["lines"] = self._parse_string() 3080 if self._match_text_seq("NULL", "DEFINED", "AS"): 3081 kwargs["null"] = self._parse_string() 3082 3083 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3084 3085 def _parse_load(self) -> exp.LoadData | exp.Command: 3086 if self._match_text_seq("DATA"): 3087 local = self._match_text_seq("LOCAL") 3088 self._match_text_seq("INPATH") 3089 inpath = self._parse_string() 3090 overwrite = self._match(TokenType.OVERWRITE) 3091 self._match_pair(TokenType.INTO, TokenType.TABLE) 3092 3093 return self.expression( 3094 exp.LoadData, 3095 this=self._parse_table(schema=True), 3096 local=local, 3097 overwrite=overwrite, 3098 inpath=inpath, 3099 partition=self._parse_partition(), 3100 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3101 serde=self._match_text_seq("SERDE") and self._parse_string(), 3102 ) 3103 return self._parse_as_command(self._prev) 3104 3105 def _parse_delete(self) -> exp.Delete: 3106 # This handles MySQL's "Multiple-Table Syntax" 3107 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3108 tables = None 3109 if not self._match(TokenType.FROM, advance=False): 3110 tables = self._parse_csv(self._parse_table) or None 3111 3112 returning = self._parse_returning() 3113 3114 return self.expression( 3115 exp.Delete, 3116 tables=tables, 3117 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3118 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3119 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3120 where=self._parse_where(), 3121 returning=returning or self._parse_returning(), 3122 limit=self._parse_limit(), 3123 ) 3124 3125 def _parse_update(self) -> exp.Update: 3126 kwargs: t.Dict[str, t.Any] = { 3127 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3128 } 3129 while self._curr: 3130 if self._match(TokenType.SET): 3131 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3132 elif self._match(TokenType.RETURNING, advance=False): 3133 kwargs["returning"] = self._parse_returning() 3134 elif self._match(TokenType.FROM, advance=False): 3135 kwargs["from"] = self._parse_from(joins=True) 3136 elif self._match(TokenType.WHERE, advance=False): 3137 kwargs["where"] = self._parse_where() 3138 elif self._match(TokenType.ORDER_BY, advance=False): 3139 kwargs["order"] = self._parse_order() 3140 elif self._match(TokenType.LIMIT, advance=False): 3141 kwargs["limit"] = self._parse_limit() 3142 else: 3143 break 3144 3145 return self.expression(exp.Update, **kwargs) 3146 3147 def _parse_use(self) -> exp.Use: 3148 return self.expression( 3149 exp.Use, 3150 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3151 this=self._parse_table(schema=False), 3152 ) 3153 3154 def _parse_uncache(self) -> exp.Uncache: 3155 if not self._match(TokenType.TABLE): 3156 self.raise_error("Expecting TABLE after UNCACHE") 3157 3158 return self.expression( 3159 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3160 ) 3161 3162 def _parse_cache(self) -> exp.Cache: 3163 lazy = self._match_text_seq("LAZY") 3164 self._match(TokenType.TABLE) 3165 table = self._parse_table(schema=True) 3166 3167 options = [] 3168 if self._match_text_seq("OPTIONS"): 3169 self._match_l_paren() 3170 k = self._parse_string() 3171 self._match(TokenType.EQ) 3172 v = self._parse_string() 3173 options = [k, v] 3174 self._match_r_paren() 3175 3176 self._match(TokenType.ALIAS) 3177 return self.expression( 3178 exp.Cache, 3179 this=table, 3180 lazy=lazy, 3181 options=options, 3182 expression=self._parse_select(nested=True), 3183 ) 3184 3185 def _parse_partition(self) -> t.Optional[exp.Partition]: 3186 if not self._match_texts(self.PARTITION_KEYWORDS): 3187 return None 3188 3189 return self.expression( 3190 exp.Partition, 3191 subpartition=self._prev.text.upper() == "SUBPARTITION", 3192 expressions=self._parse_wrapped_csv(self._parse_assignment), 3193 ) 3194 3195 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3196 def _parse_value_expression() -> t.Optional[exp.Expression]: 3197 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3198 return exp.var(self._prev.text.upper()) 3199 return self._parse_expression() 3200 3201 if self._match(TokenType.L_PAREN): 3202 expressions = self._parse_csv(_parse_value_expression) 3203 self._match_r_paren() 3204 return self.expression(exp.Tuple, expressions=expressions) 3205 3206 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3207 expression = self._parse_expression() 3208 if expression: 3209 return self.expression(exp.Tuple, expressions=[expression]) 3210 return None 3211 3212 def _parse_projections(self) -> t.List[exp.Expression]: 3213 return self._parse_expressions() 3214 3215 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3216 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3217 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3218 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3219 ) 3220 elif self._match(TokenType.FROM): 3221 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3222 # Support parentheses for duckdb FROM-first syntax 3223 select = self._parse_select(from_=from_) 3224 if select: 3225 if not select.args.get("from"): 3226 select.set("from", from_) 3227 this = select 3228 else: 3229 this = exp.select("*").from_(t.cast(exp.From, from_)) 3230 else: 3231 this = ( 3232 self._parse_table(consume_pipe=True) 3233 if table 3234 else self._parse_select(nested=True, parse_set_operation=False) 3235 ) 3236 3237 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3238 # in case a modifier (e.g. join) is following 3239 if table and isinstance(this, exp.Values) and this.alias: 3240 alias = this.args["alias"].pop() 3241 this = exp.Table(this=this, alias=alias) 3242 3243 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3244 3245 return this 3246 3247 def _parse_select( 3248 self, 3249 nested: bool = False, 3250 table: bool = False, 3251 parse_subquery_alias: bool = True, 3252 parse_set_operation: bool = True, 3253 consume_pipe: bool = True, 3254 from_: t.Optional[exp.From] = None, 3255 ) -> t.Optional[exp.Expression]: 3256 query = self._parse_select_query( 3257 nested=nested, 3258 table=table, 3259 parse_subquery_alias=parse_subquery_alias, 3260 parse_set_operation=parse_set_operation, 3261 ) 3262 3263 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3264 if not query and from_: 3265 query = exp.select("*").from_(from_) 3266 if isinstance(query, exp.Query): 3267 query = self._parse_pipe_syntax_query(query) 3268 query = query.subquery(copy=False) if query and table else query 3269 3270 return query 3271 3272 def _parse_select_query( 3273 self, 3274 nested: bool = False, 3275 table: bool = False, 3276 parse_subquery_alias: bool = True, 3277 parse_set_operation: bool = True, 3278 ) -> t.Optional[exp.Expression]: 3279 cte = self._parse_with() 3280 3281 if cte: 3282 this = self._parse_statement() 3283 3284 if not this: 3285 self.raise_error("Failed to parse any statement following CTE") 3286 return cte 3287 3288 if "with" in this.arg_types: 3289 this.set("with", cte) 3290 else: 3291 self.raise_error(f"{this.key} does not support CTE") 3292 this = cte 3293 3294 return this 3295 3296 # duckdb supports leading with FROM x 3297 from_ = ( 3298 self._parse_from(consume_pipe=True) 3299 if self._match(TokenType.FROM, advance=False) 3300 else None 3301 ) 3302 3303 if self._match(TokenType.SELECT): 3304 comments = self._prev_comments 3305 3306 hint = self._parse_hint() 3307 3308 if self._next and not self._next.token_type == TokenType.DOT: 3309 all_ = self._match(TokenType.ALL) 3310 distinct = self._match_set(self.DISTINCT_TOKENS) 3311 else: 3312 all_, distinct = None, None 3313 3314 kind = ( 3315 self._match(TokenType.ALIAS) 3316 and self._match_texts(("STRUCT", "VALUE")) 3317 and self._prev.text.upper() 3318 ) 3319 3320 if distinct: 3321 distinct = self.expression( 3322 exp.Distinct, 3323 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3324 ) 3325 3326 if all_ and distinct: 3327 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3328 3329 operation_modifiers = [] 3330 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3331 operation_modifiers.append(exp.var(self._prev.text.upper())) 3332 3333 limit = self._parse_limit(top=True) 3334 projections = self._parse_projections() 3335 3336 this = self.expression( 3337 exp.Select, 3338 kind=kind, 3339 hint=hint, 3340 distinct=distinct, 3341 expressions=projections, 3342 limit=limit, 3343 operation_modifiers=operation_modifiers or None, 3344 ) 3345 this.comments = comments 3346 3347 into = self._parse_into() 3348 if into: 3349 this.set("into", into) 3350 3351 if not from_: 3352 from_ = self._parse_from() 3353 3354 if from_: 3355 this.set("from", from_) 3356 3357 this = self._parse_query_modifiers(this) 3358 elif (table or nested) and self._match(TokenType.L_PAREN): 3359 this = self._parse_wrapped_select(table=table) 3360 3361 # We return early here so that the UNION isn't attached to the subquery by the 3362 # following call to _parse_set_operations, but instead becomes the parent node 3363 self._match_r_paren() 3364 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3365 elif self._match(TokenType.VALUES, advance=False): 3366 this = self._parse_derived_table_values() 3367 elif from_: 3368 this = exp.select("*").from_(from_.this, copy=False) 3369 elif self._match(TokenType.SUMMARIZE): 3370 table = self._match(TokenType.TABLE) 3371 this = self._parse_select() or self._parse_string() or self._parse_table() 3372 return self.expression(exp.Summarize, this=this, table=table) 3373 elif self._match(TokenType.DESCRIBE): 3374 this = self._parse_describe() 3375 elif self._match_text_seq("STREAM"): 3376 this = self._parse_function() 3377 if this: 3378 this = self.expression(exp.Stream, this=this) 3379 else: 3380 self._retreat(self._index - 1) 3381 else: 3382 this = None 3383 3384 return self._parse_set_operations(this) if parse_set_operation else this 3385 3386 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3387 self._match_text_seq("SEARCH") 3388 3389 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3390 3391 if not kind: 3392 return None 3393 3394 self._match_text_seq("FIRST", "BY") 3395 3396 return self.expression( 3397 exp.RecursiveWithSearch, 3398 kind=kind, 3399 this=self._parse_id_var(), 3400 expression=self._match_text_seq("SET") and self._parse_id_var(), 3401 using=self._match_text_seq("USING") and self._parse_id_var(), 3402 ) 3403 3404 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3405 if not skip_with_token and not self._match(TokenType.WITH): 3406 return None 3407 3408 comments = self._prev_comments 3409 recursive = self._match(TokenType.RECURSIVE) 3410 3411 last_comments = None 3412 expressions = [] 3413 while True: 3414 cte = self._parse_cte() 3415 if isinstance(cte, exp.CTE): 3416 expressions.append(cte) 3417 if last_comments: 3418 cte.add_comments(last_comments) 3419 3420 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3421 break 3422 else: 3423 self._match(TokenType.WITH) 3424 3425 last_comments = self._prev_comments 3426 3427 return self.expression( 3428 exp.With, 3429 comments=comments, 3430 expressions=expressions, 3431 recursive=recursive, 3432 search=self._parse_recursive_with_search(), 3433 ) 3434 3435 def _parse_cte(self) -> t.Optional[exp.CTE]: 3436 index = self._index 3437 3438 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3439 if not alias or not alias.this: 3440 self.raise_error("Expected CTE to have alias") 3441 3442 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3443 self._retreat(index) 3444 return None 3445 3446 comments = self._prev_comments 3447 3448 if self._match_text_seq("NOT", "MATERIALIZED"): 3449 materialized = False 3450 elif self._match_text_seq("MATERIALIZED"): 3451 materialized = True 3452 else: 3453 materialized = None 3454 3455 cte = self.expression( 3456 exp.CTE, 3457 this=self._parse_wrapped(self._parse_statement), 3458 alias=alias, 3459 materialized=materialized, 3460 comments=comments, 3461 ) 3462 3463 values = cte.this 3464 if isinstance(values, exp.Values): 3465 if values.alias: 3466 cte.set("this", exp.select("*").from_(values)) 3467 else: 3468 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3469 3470 return cte 3471 3472 def _parse_table_alias( 3473 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3474 ) -> t.Optional[exp.TableAlias]: 3475 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3476 # so this section tries to parse the clause version and if it fails, it treats the token 3477 # as an identifier (alias) 3478 if self._can_parse_limit_or_offset(): 3479 return None 3480 3481 any_token = self._match(TokenType.ALIAS) 3482 alias = ( 3483 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3484 or self._parse_string_as_identifier() 3485 ) 3486 3487 index = self._index 3488 if self._match(TokenType.L_PAREN): 3489 columns = self._parse_csv(self._parse_function_parameter) 3490 self._match_r_paren() if columns else self._retreat(index) 3491 else: 3492 columns = None 3493 3494 if not alias and not columns: 3495 return None 3496 3497 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3498 3499 # We bubble up comments from the Identifier to the TableAlias 3500 if isinstance(alias, exp.Identifier): 3501 table_alias.add_comments(alias.pop_comments()) 3502 3503 return table_alias 3504 3505 def _parse_subquery( 3506 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3507 ) -> t.Optional[exp.Subquery]: 3508 if not this: 3509 return None 3510 3511 return self.expression( 3512 exp.Subquery, 3513 this=this, 3514 pivots=self._parse_pivots(), 3515 alias=self._parse_table_alias() if parse_alias else None, 3516 sample=self._parse_table_sample(), 3517 ) 3518 3519 def _implicit_unnests_to_explicit(self, this: E) -> E: 3520 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3521 3522 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3523 for i, join in enumerate(this.args.get("joins") or []): 3524 table = join.this 3525 normalized_table = table.copy() 3526 normalized_table.meta["maybe_column"] = True 3527 normalized_table = _norm(normalized_table, dialect=self.dialect) 3528 3529 if isinstance(table, exp.Table) and not join.args.get("on"): 3530 if normalized_table.parts[0].name in refs: 3531 table_as_column = table.to_column() 3532 unnest = exp.Unnest(expressions=[table_as_column]) 3533 3534 # Table.to_column creates a parent Alias node that we want to convert to 3535 # a TableAlias and attach to the Unnest, so it matches the parser's output 3536 if isinstance(table.args.get("alias"), exp.TableAlias): 3537 table_as_column.replace(table_as_column.this) 3538 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3539 3540 table.replace(unnest) 3541 3542 refs.add(normalized_table.alias_or_name) 3543 3544 return this 3545 3546 def _parse_query_modifiers( 3547 self, this: t.Optional[exp.Expression] 3548 ) -> t.Optional[exp.Expression]: 3549 if isinstance(this, self.MODIFIABLES): 3550 for join in self._parse_joins(): 3551 this.append("joins", join) 3552 for lateral in iter(self._parse_lateral, None): 3553 this.append("laterals", lateral) 3554 3555 while True: 3556 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3557 modifier_token = self._curr 3558 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3559 key, expression = parser(self) 3560 3561 if expression: 3562 if this.args.get(key): 3563 self.raise_error( 3564 f"Found multiple '{modifier_token.text.upper()}' clauses", 3565 token=modifier_token, 3566 ) 3567 3568 this.set(key, expression) 3569 if key == "limit": 3570 offset = expression.args.pop("offset", None) 3571 3572 if offset: 3573 offset = exp.Offset(expression=offset) 3574 this.set("offset", offset) 3575 3576 limit_by_expressions = expression.expressions 3577 expression.set("expressions", None) 3578 offset.set("expressions", limit_by_expressions) 3579 continue 3580 break 3581 3582 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3583 this = self._implicit_unnests_to_explicit(this) 3584 3585 return this 3586 3587 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3588 start = self._curr 3589 while self._curr: 3590 self._advance() 3591 3592 end = self._tokens[self._index - 1] 3593 return exp.Hint(expressions=[self._find_sql(start, end)]) 3594 3595 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3596 return self._parse_function_call() 3597 3598 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3599 start_index = self._index 3600 should_fallback_to_string = False 3601 3602 hints = [] 3603 try: 3604 for hint in iter( 3605 lambda: self._parse_csv( 3606 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3607 ), 3608 [], 3609 ): 3610 hints.extend(hint) 3611 except ParseError: 3612 should_fallback_to_string = True 3613 3614 if should_fallback_to_string or self._curr: 3615 self._retreat(start_index) 3616 return self._parse_hint_fallback_to_string() 3617 3618 return self.expression(exp.Hint, expressions=hints) 3619 3620 def _parse_hint(self) -> t.Optional[exp.Hint]: 3621 if self._match(TokenType.HINT) and self._prev_comments: 3622 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3623 3624 return None 3625 3626 def _parse_into(self) -> t.Optional[exp.Into]: 3627 if not self._match(TokenType.INTO): 3628 return None 3629 3630 temp = self._match(TokenType.TEMPORARY) 3631 unlogged = self._match_text_seq("UNLOGGED") 3632 self._match(TokenType.TABLE) 3633 3634 return self.expression( 3635 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3636 ) 3637 3638 def _parse_from( 3639 self, 3640 joins: bool = False, 3641 skip_from_token: bool = False, 3642 consume_pipe: bool = False, 3643 ) -> t.Optional[exp.From]: 3644 if not skip_from_token and not self._match(TokenType.FROM): 3645 return None 3646 3647 return self.expression( 3648 exp.From, 3649 comments=self._prev_comments, 3650 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3651 ) 3652 3653 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3654 return self.expression( 3655 exp.MatchRecognizeMeasure, 3656 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3657 this=self._parse_expression(), 3658 ) 3659 3660 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3661 if not self._match(TokenType.MATCH_RECOGNIZE): 3662 return None 3663 3664 self._match_l_paren() 3665 3666 partition = self._parse_partition_by() 3667 order = self._parse_order() 3668 3669 measures = ( 3670 self._parse_csv(self._parse_match_recognize_measure) 3671 if self._match_text_seq("MEASURES") 3672 else None 3673 ) 3674 3675 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3676 rows = exp.var("ONE ROW PER MATCH") 3677 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3678 text = "ALL ROWS PER MATCH" 3679 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3680 text += " SHOW EMPTY MATCHES" 3681 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3682 text += " OMIT EMPTY MATCHES" 3683 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3684 text += " WITH UNMATCHED ROWS" 3685 rows = exp.var(text) 3686 else: 3687 rows = None 3688 3689 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3690 text = "AFTER MATCH SKIP" 3691 if self._match_text_seq("PAST", "LAST", "ROW"): 3692 text += " PAST LAST ROW" 3693 elif self._match_text_seq("TO", "NEXT", "ROW"): 3694 text += " TO NEXT ROW" 3695 elif self._match_text_seq("TO", "FIRST"): 3696 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3697 elif self._match_text_seq("TO", "LAST"): 3698 text += f" TO LAST {self._advance_any().text}" # type: ignore 3699 after = exp.var(text) 3700 else: 3701 after = None 3702 3703 if self._match_text_seq("PATTERN"): 3704 self._match_l_paren() 3705 3706 if not self._curr: 3707 self.raise_error("Expecting )", self._curr) 3708 3709 paren = 1 3710 start = self._curr 3711 3712 while self._curr and paren > 0: 3713 if self._curr.token_type == TokenType.L_PAREN: 3714 paren += 1 3715 if self._curr.token_type == TokenType.R_PAREN: 3716 paren -= 1 3717 3718 end = self._prev 3719 self._advance() 3720 3721 if paren > 0: 3722 self.raise_error("Expecting )", self._curr) 3723 3724 pattern = exp.var(self._find_sql(start, end)) 3725 else: 3726 pattern = None 3727 3728 define = ( 3729 self._parse_csv(self._parse_name_as_expression) 3730 if self._match_text_seq("DEFINE") 3731 else None 3732 ) 3733 3734 self._match_r_paren() 3735 3736 return self.expression( 3737 exp.MatchRecognize, 3738 partition_by=partition, 3739 order=order, 3740 measures=measures, 3741 rows=rows, 3742 after=after, 3743 pattern=pattern, 3744 define=define, 3745 alias=self._parse_table_alias(), 3746 ) 3747 3748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3749 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3750 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3751 cross_apply = False 3752 3753 if cross_apply is not None: 3754 this = self._parse_select(table=True) 3755 view = None 3756 outer = None 3757 elif self._match(TokenType.LATERAL): 3758 this = self._parse_select(table=True) 3759 view = self._match(TokenType.VIEW) 3760 outer = self._match(TokenType.OUTER) 3761 else: 3762 return None 3763 3764 if not this: 3765 this = ( 3766 self._parse_unnest() 3767 or self._parse_function() 3768 or self._parse_id_var(any_token=False) 3769 ) 3770 3771 while self._match(TokenType.DOT): 3772 this = exp.Dot( 3773 this=this, 3774 expression=self._parse_function() or self._parse_id_var(any_token=False), 3775 ) 3776 3777 ordinality: t.Optional[bool] = None 3778 3779 if view: 3780 table = self._parse_id_var(any_token=False) 3781 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3782 table_alias: t.Optional[exp.TableAlias] = self.expression( 3783 exp.TableAlias, this=table, columns=columns 3784 ) 3785 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3786 # We move the alias from the lateral's child node to the lateral itself 3787 table_alias = this.args["alias"].pop() 3788 else: 3789 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3790 table_alias = self._parse_table_alias() 3791 3792 return self.expression( 3793 exp.Lateral, 3794 this=this, 3795 view=view, 3796 outer=outer, 3797 alias=table_alias, 3798 cross_apply=cross_apply, 3799 ordinality=ordinality, 3800 ) 3801 3802 def _parse_join_parts( 3803 self, 3804 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3805 return ( 3806 self._match_set(self.JOIN_METHODS) and self._prev, 3807 self._match_set(self.JOIN_SIDES) and self._prev, 3808 self._match_set(self.JOIN_KINDS) and self._prev, 3809 ) 3810 3811 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3812 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3813 this = self._parse_column() 3814 if isinstance(this, exp.Column): 3815 return this.this 3816 return this 3817 3818 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3819 3820 def _parse_join( 3821 self, skip_join_token: bool = False, parse_bracket: bool = False 3822 ) -> t.Optional[exp.Join]: 3823 if self._match(TokenType.COMMA): 3824 table = self._try_parse(self._parse_table) 3825 cross_join = self.expression(exp.Join, this=table) if table else None 3826 3827 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3828 cross_join.set("kind", "CROSS") 3829 3830 return cross_join 3831 3832 index = self._index 3833 method, side, kind = self._parse_join_parts() 3834 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3835 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3836 join_comments = self._prev_comments 3837 3838 if not skip_join_token and not join: 3839 self._retreat(index) 3840 kind = None 3841 method = None 3842 side = None 3843 3844 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3845 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3846 3847 if not skip_join_token and not join and not outer_apply and not cross_apply: 3848 return None 3849 3850 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3851 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3852 kwargs["expressions"] = self._parse_csv( 3853 lambda: self._parse_table(parse_bracket=parse_bracket) 3854 ) 3855 3856 if method: 3857 kwargs["method"] = method.text 3858 if side: 3859 kwargs["side"] = side.text 3860 if kind: 3861 kwargs["kind"] = kind.text 3862 if hint: 3863 kwargs["hint"] = hint 3864 3865 if self._match(TokenType.MATCH_CONDITION): 3866 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3867 3868 if self._match(TokenType.ON): 3869 kwargs["on"] = self._parse_assignment() 3870 elif self._match(TokenType.USING): 3871 kwargs["using"] = self._parse_using_identifiers() 3872 elif ( 3873 not method 3874 and not (outer_apply or cross_apply) 3875 and not isinstance(kwargs["this"], exp.Unnest) 3876 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3877 ): 3878 index = self._index 3879 joins: t.Optional[list] = list(self._parse_joins()) 3880 3881 if joins and self._match(TokenType.ON): 3882 kwargs["on"] = self._parse_assignment() 3883 elif joins and self._match(TokenType.USING): 3884 kwargs["using"] = self._parse_using_identifiers() 3885 else: 3886 joins = None 3887 self._retreat(index) 3888 3889 kwargs["this"].set("joins", joins if joins else None) 3890 3891 kwargs["pivots"] = self._parse_pivots() 3892 3893 comments = [c for token in (method, side, kind) if token for c in token.comments] 3894 comments = (join_comments or []) + comments 3895 3896 if ( 3897 self.ADD_JOIN_ON_TRUE 3898 and not kwargs.get("on") 3899 and not kwargs.get("using") 3900 and not kwargs.get("method") 3901 and kwargs.get("kind") in (None, "INNER", "OUTER") 3902 ): 3903 kwargs["on"] = exp.true() 3904 3905 return self.expression(exp.Join, comments=comments, **kwargs) 3906 3907 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3908 this = self._parse_assignment() 3909 3910 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3911 return this 3912 3913 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3914 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3915 3916 return this 3917 3918 def _parse_index_params(self) -> exp.IndexParameters: 3919 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3920 3921 if self._match(TokenType.L_PAREN, advance=False): 3922 columns = self._parse_wrapped_csv(self._parse_with_operator) 3923 else: 3924 columns = None 3925 3926 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3927 partition_by = self._parse_partition_by() 3928 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3929 tablespace = ( 3930 self._parse_var(any_token=True) 3931 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3932 else None 3933 ) 3934 where = self._parse_where() 3935 3936 on = self._parse_field() if self._match(TokenType.ON) else None 3937 3938 return self.expression( 3939 exp.IndexParameters, 3940 using=using, 3941 columns=columns, 3942 include=include, 3943 partition_by=partition_by, 3944 where=where, 3945 with_storage=with_storage, 3946 tablespace=tablespace, 3947 on=on, 3948 ) 3949 3950 def _parse_index( 3951 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3952 ) -> t.Optional[exp.Index]: 3953 if index or anonymous: 3954 unique = None 3955 primary = None 3956 amp = None 3957 3958 self._match(TokenType.ON) 3959 self._match(TokenType.TABLE) # hive 3960 table = self._parse_table_parts(schema=True) 3961 else: 3962 unique = self._match(TokenType.UNIQUE) 3963 primary = self._match_text_seq("PRIMARY") 3964 amp = self._match_text_seq("AMP") 3965 3966 if not self._match(TokenType.INDEX): 3967 return None 3968 3969 index = self._parse_id_var() 3970 table = None 3971 3972 params = self._parse_index_params() 3973 3974 return self.expression( 3975 exp.Index, 3976 this=index, 3977 table=table, 3978 unique=unique, 3979 primary=primary, 3980 amp=amp, 3981 params=params, 3982 ) 3983 3984 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3985 hints: t.List[exp.Expression] = [] 3986 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3987 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3988 hints.append( 3989 self.expression( 3990 exp.WithTableHint, 3991 expressions=self._parse_csv( 3992 lambda: self._parse_function() or self._parse_var(any_token=True) 3993 ), 3994 ) 3995 ) 3996 self._match_r_paren() 3997 else: 3998 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3999 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4000 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4001 4002 self._match_set((TokenType.INDEX, TokenType.KEY)) 4003 if self._match(TokenType.FOR): 4004 hint.set("target", self._advance_any() and self._prev.text.upper()) 4005 4006 hint.set("expressions", self._parse_wrapped_id_vars()) 4007 hints.append(hint) 4008 4009 return hints or None 4010 4011 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4012 return ( 4013 (not schema and self._parse_function(optional_parens=False)) 4014 or self._parse_id_var(any_token=False) 4015 or self._parse_string_as_identifier() 4016 or self._parse_placeholder() 4017 ) 4018 4019 def _parse_table_parts( 4020 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4021 ) -> exp.Table: 4022 catalog = None 4023 db = None 4024 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4025 4026 while self._match(TokenType.DOT): 4027 if catalog: 4028 # This allows nesting the table in arbitrarily many dot expressions if needed 4029 table = self.expression( 4030 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4031 ) 4032 else: 4033 catalog = db 4034 db = table 4035 # "" used for tsql FROM a..b case 4036 table = self._parse_table_part(schema=schema) or "" 4037 4038 if ( 4039 wildcard 4040 and self._is_connected() 4041 and (isinstance(table, exp.Identifier) or not table) 4042 and self._match(TokenType.STAR) 4043 ): 4044 if isinstance(table, exp.Identifier): 4045 table.args["this"] += "*" 4046 else: 4047 table = exp.Identifier(this="*") 4048 4049 # We bubble up comments from the Identifier to the Table 4050 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4051 4052 if is_db_reference: 4053 catalog = db 4054 db = table 4055 table = None 4056 4057 if not table and not is_db_reference: 4058 self.raise_error(f"Expected table name but got {self._curr}") 4059 if not db and is_db_reference: 4060 self.raise_error(f"Expected database name but got {self._curr}") 4061 4062 table = self.expression( 4063 exp.Table, 4064 comments=comments, 4065 this=table, 4066 db=db, 4067 catalog=catalog, 4068 ) 4069 4070 changes = self._parse_changes() 4071 if changes: 4072 table.set("changes", changes) 4073 4074 at_before = self._parse_historical_data() 4075 if at_before: 4076 table.set("when", at_before) 4077 4078 pivots = self._parse_pivots() 4079 if pivots: 4080 table.set("pivots", pivots) 4081 4082 return table 4083 4084 def _parse_table( 4085 self, 4086 schema: bool = False, 4087 joins: bool = False, 4088 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4089 parse_bracket: bool = False, 4090 is_db_reference: bool = False, 4091 parse_partition: bool = False, 4092 consume_pipe: bool = False, 4093 ) -> t.Optional[exp.Expression]: 4094 lateral = self._parse_lateral() 4095 if lateral: 4096 return lateral 4097 4098 unnest = self._parse_unnest() 4099 if unnest: 4100 return unnest 4101 4102 values = self._parse_derived_table_values() 4103 if values: 4104 return values 4105 4106 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4107 if subquery: 4108 if not subquery.args.get("pivots"): 4109 subquery.set("pivots", self._parse_pivots()) 4110 return subquery 4111 4112 bracket = parse_bracket and self._parse_bracket(None) 4113 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4114 4115 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4116 self._parse_table 4117 ) 4118 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4119 4120 only = self._match(TokenType.ONLY) 4121 4122 this = t.cast( 4123 exp.Expression, 4124 bracket 4125 or rows_from 4126 or self._parse_bracket( 4127 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4128 ), 4129 ) 4130 4131 if only: 4132 this.set("only", only) 4133 4134 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4135 self._match_text_seq("*") 4136 4137 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4138 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4139 this.set("partition", self._parse_partition()) 4140 4141 if schema: 4142 return self._parse_schema(this=this) 4143 4144 version = self._parse_version() 4145 4146 if version: 4147 this.set("version", version) 4148 4149 if self.dialect.ALIAS_POST_TABLESAMPLE: 4150 this.set("sample", self._parse_table_sample()) 4151 4152 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4153 if alias: 4154 this.set("alias", alias) 4155 4156 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4157 return self.expression( 4158 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4159 ) 4160 4161 this.set("hints", self._parse_table_hints()) 4162 4163 if not this.args.get("pivots"): 4164 this.set("pivots", self._parse_pivots()) 4165 4166 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4167 this.set("sample", self._parse_table_sample()) 4168 4169 if joins: 4170 for join in self._parse_joins(): 4171 this.append("joins", join) 4172 4173 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4174 this.set("ordinality", True) 4175 this.set("alias", self._parse_table_alias()) 4176 4177 return this 4178 4179 def _parse_version(self) -> t.Optional[exp.Version]: 4180 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4181 this = "TIMESTAMP" 4182 elif self._match(TokenType.VERSION_SNAPSHOT): 4183 this = "VERSION" 4184 else: 4185 return None 4186 4187 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4188 kind = self._prev.text.upper() 4189 start = self._parse_bitwise() 4190 self._match_texts(("TO", "AND")) 4191 end = self._parse_bitwise() 4192 expression: t.Optional[exp.Expression] = self.expression( 4193 exp.Tuple, expressions=[start, end] 4194 ) 4195 elif self._match_text_seq("CONTAINED", "IN"): 4196 kind = "CONTAINED IN" 4197 expression = self.expression( 4198 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4199 ) 4200 elif self._match(TokenType.ALL): 4201 kind = "ALL" 4202 expression = None 4203 else: 4204 self._match_text_seq("AS", "OF") 4205 kind = "AS OF" 4206 expression = self._parse_type() 4207 4208 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4209 4210 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4211 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4212 index = self._index 4213 historical_data = None 4214 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4215 this = self._prev.text.upper() 4216 kind = ( 4217 self._match(TokenType.L_PAREN) 4218 and self._match_texts(self.HISTORICAL_DATA_KIND) 4219 and self._prev.text.upper() 4220 ) 4221 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4222 4223 if expression: 4224 self._match_r_paren() 4225 historical_data = self.expression( 4226 exp.HistoricalData, this=this, kind=kind, expression=expression 4227 ) 4228 else: 4229 self._retreat(index) 4230 4231 return historical_data 4232 4233 def _parse_changes(self) -> t.Optional[exp.Changes]: 4234 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4235 return None 4236 4237 information = self._parse_var(any_token=True) 4238 self._match_r_paren() 4239 4240 return self.expression( 4241 exp.Changes, 4242 information=information, 4243 at_before=self._parse_historical_data(), 4244 end=self._parse_historical_data(), 4245 ) 4246 4247 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4248 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4249 return None 4250 4251 self._advance() 4252 4253 expressions = self._parse_wrapped_csv(self._parse_equality) 4254 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4255 4256 alias = self._parse_table_alias() if with_alias else None 4257 4258 if alias: 4259 if self.dialect.UNNEST_COLUMN_ONLY: 4260 if alias.args.get("columns"): 4261 self.raise_error("Unexpected extra column alias in unnest.") 4262 4263 alias.set("columns", [alias.this]) 4264 alias.set("this", None) 4265 4266 columns = alias.args.get("columns") or [] 4267 if offset and len(expressions) < len(columns): 4268 offset = columns.pop() 4269 4270 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4271 self._match(TokenType.ALIAS) 4272 offset = self._parse_id_var( 4273 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4274 ) or exp.to_identifier("offset") 4275 4276 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4277 4278 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4279 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4280 if not is_derived and not ( 4281 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4282 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4283 ): 4284 return None 4285 4286 expressions = self._parse_csv(self._parse_value) 4287 alias = self._parse_table_alias() 4288 4289 if is_derived: 4290 self._match_r_paren() 4291 4292 return self.expression( 4293 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4294 ) 4295 4296 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4297 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4298 as_modifier and self._match_text_seq("USING", "SAMPLE") 4299 ): 4300 return None 4301 4302 bucket_numerator = None 4303 bucket_denominator = None 4304 bucket_field = None 4305 percent = None 4306 size = None 4307 seed = None 4308 4309 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4310 matched_l_paren = self._match(TokenType.L_PAREN) 4311 4312 if self.TABLESAMPLE_CSV: 4313 num = None 4314 expressions = self._parse_csv(self._parse_primary) 4315 else: 4316 expressions = None 4317 num = ( 4318 self._parse_factor() 4319 if self._match(TokenType.NUMBER, advance=False) 4320 else self._parse_primary() or self._parse_placeholder() 4321 ) 4322 4323 if self._match_text_seq("BUCKET"): 4324 bucket_numerator = self._parse_number() 4325 self._match_text_seq("OUT", "OF") 4326 bucket_denominator = bucket_denominator = self._parse_number() 4327 self._match(TokenType.ON) 4328 bucket_field = self._parse_field() 4329 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4330 percent = num 4331 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4332 size = num 4333 else: 4334 percent = num 4335 4336 if matched_l_paren: 4337 self._match_r_paren() 4338 4339 if self._match(TokenType.L_PAREN): 4340 method = self._parse_var(upper=True) 4341 seed = self._match(TokenType.COMMA) and self._parse_number() 4342 self._match_r_paren() 4343 elif self._match_texts(("SEED", "REPEATABLE")): 4344 seed = self._parse_wrapped(self._parse_number) 4345 4346 if not method and self.DEFAULT_SAMPLING_METHOD: 4347 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4348 4349 return self.expression( 4350 exp.TableSample, 4351 expressions=expressions, 4352 method=method, 4353 bucket_numerator=bucket_numerator, 4354 bucket_denominator=bucket_denominator, 4355 bucket_field=bucket_field, 4356 percent=percent, 4357 size=size, 4358 seed=seed, 4359 ) 4360 4361 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4362 return list(iter(self._parse_pivot, None)) or None 4363 4364 def _parse_joins(self) -> t.Iterator[exp.Join]: 4365 return iter(self._parse_join, None) 4366 4367 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4368 if not self._match(TokenType.INTO): 4369 return None 4370 4371 return self.expression( 4372 exp.UnpivotColumns, 4373 this=self._match_text_seq("NAME") and self._parse_column(), 4374 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4375 ) 4376 4377 # https://duckdb.org/docs/sql/statements/pivot 4378 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4379 def _parse_on() -> t.Optional[exp.Expression]: 4380 this = self._parse_bitwise() 4381 4382 if self._match(TokenType.IN): 4383 # PIVOT ... ON col IN (row_val1, row_val2) 4384 return self._parse_in(this) 4385 if self._match(TokenType.ALIAS, advance=False): 4386 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4387 return self._parse_alias(this) 4388 4389 return this 4390 4391 this = self._parse_table() 4392 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4393 into = self._parse_unpivot_columns() 4394 using = self._match(TokenType.USING) and self._parse_csv( 4395 lambda: self._parse_alias(self._parse_function()) 4396 ) 4397 group = self._parse_group() 4398 4399 return self.expression( 4400 exp.Pivot, 4401 this=this, 4402 expressions=expressions, 4403 using=using, 4404 group=group, 4405 unpivot=is_unpivot, 4406 into=into, 4407 ) 4408 4409 def _parse_pivot_in(self) -> exp.In: 4410 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4411 this = self._parse_select_or_expression() 4412 4413 self._match(TokenType.ALIAS) 4414 alias = self._parse_bitwise() 4415 if alias: 4416 if isinstance(alias, exp.Column) and not alias.db: 4417 alias = alias.this 4418 return self.expression(exp.PivotAlias, this=this, alias=alias) 4419 4420 return this 4421 4422 value = self._parse_column() 4423 4424 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4425 self.raise_error("Expecting IN (") 4426 4427 if self._match(TokenType.ANY): 4428 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4429 else: 4430 exprs = self._parse_csv(_parse_aliased_expression) 4431 4432 self._match_r_paren() 4433 return self.expression(exp.In, this=value, expressions=exprs) 4434 4435 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4436 func = self._parse_function() 4437 if not func: 4438 if self._prev and self._prev.token_type == TokenType.COMMA: 4439 return None 4440 self.raise_error("Expecting an aggregation function in PIVOT") 4441 4442 return self._parse_alias(func) 4443 4444 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4445 index = self._index 4446 include_nulls = None 4447 4448 if self._match(TokenType.PIVOT): 4449 unpivot = False 4450 elif self._match(TokenType.UNPIVOT): 4451 unpivot = True 4452 4453 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4454 if self._match_text_seq("INCLUDE", "NULLS"): 4455 include_nulls = True 4456 elif self._match_text_seq("EXCLUDE", "NULLS"): 4457 include_nulls = False 4458 else: 4459 return None 4460 4461 expressions = [] 4462 4463 if not self._match(TokenType.L_PAREN): 4464 self._retreat(index) 4465 return None 4466 4467 if unpivot: 4468 expressions = self._parse_csv(self._parse_column) 4469 else: 4470 expressions = self._parse_csv(self._parse_pivot_aggregation) 4471 4472 if not expressions: 4473 self.raise_error("Failed to parse PIVOT's aggregation list") 4474 4475 if not self._match(TokenType.FOR): 4476 self.raise_error("Expecting FOR") 4477 4478 fields = [] 4479 while True: 4480 field = self._try_parse(self._parse_pivot_in) 4481 if not field: 4482 break 4483 fields.append(field) 4484 4485 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4486 self._parse_bitwise 4487 ) 4488 4489 group = self._parse_group() 4490 4491 self._match_r_paren() 4492 4493 pivot = self.expression( 4494 exp.Pivot, 4495 expressions=expressions, 4496 fields=fields, 4497 unpivot=unpivot, 4498 include_nulls=include_nulls, 4499 default_on_null=default_on_null, 4500 group=group, 4501 ) 4502 4503 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4504 pivot.set("alias", self._parse_table_alias()) 4505 4506 if not unpivot: 4507 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4508 4509 columns: t.List[exp.Expression] = [] 4510 all_fields = [] 4511 for pivot_field in pivot.fields: 4512 pivot_field_expressions = pivot_field.expressions 4513 4514 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4515 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4516 continue 4517 4518 all_fields.append( 4519 [ 4520 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4521 for fld in pivot_field_expressions 4522 ] 4523 ) 4524 4525 if all_fields: 4526 if names: 4527 all_fields.append(names) 4528 4529 # Generate all possible combinations of the pivot columns 4530 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4531 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4532 for fld_parts_tuple in itertools.product(*all_fields): 4533 fld_parts = list(fld_parts_tuple) 4534 4535 if names and self.PREFIXED_PIVOT_COLUMNS: 4536 # Move the "name" to the front of the list 4537 fld_parts.insert(0, fld_parts.pop(-1)) 4538 4539 columns.append(exp.to_identifier("_".join(fld_parts))) 4540 4541 pivot.set("columns", columns) 4542 4543 return pivot 4544 4545 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4546 return [agg.alias for agg in aggregations if agg.alias] 4547 4548 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4549 if not skip_where_token and not self._match(TokenType.PREWHERE): 4550 return None 4551 4552 return self.expression( 4553 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4554 ) 4555 4556 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4557 if not skip_where_token and not self._match(TokenType.WHERE): 4558 return None 4559 4560 return self.expression( 4561 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4562 ) 4563 4564 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4565 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4566 return None 4567 comments = self._prev_comments 4568 4569 elements: t.Dict[str, t.Any] = defaultdict(list) 4570 4571 if self._match(TokenType.ALL): 4572 elements["all"] = True 4573 elif self._match(TokenType.DISTINCT): 4574 elements["all"] = False 4575 4576 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4577 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4578 4579 while True: 4580 index = self._index 4581 4582 elements["expressions"].extend( 4583 self._parse_csv( 4584 lambda: None 4585 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4586 else self._parse_assignment() 4587 ) 4588 ) 4589 4590 before_with_index = self._index 4591 with_prefix = self._match(TokenType.WITH) 4592 4593 if self._match(TokenType.ROLLUP): 4594 elements["rollup"].append( 4595 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4596 ) 4597 elif self._match(TokenType.CUBE): 4598 elements["cube"].append( 4599 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4600 ) 4601 elif self._match(TokenType.GROUPING_SETS): 4602 elements["grouping_sets"].append( 4603 self.expression( 4604 exp.GroupingSets, 4605 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4606 ) 4607 ) 4608 elif self._match_text_seq("TOTALS"): 4609 elements["totals"] = True # type: ignore 4610 4611 if before_with_index <= self._index <= before_with_index + 1: 4612 self._retreat(before_with_index) 4613 break 4614 4615 if index == self._index: 4616 break 4617 4618 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4619 4620 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4621 return self.expression( 4622 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4623 ) 4624 4625 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4626 if self._match(TokenType.L_PAREN): 4627 grouping_set = self._parse_csv(self._parse_bitwise) 4628 self._match_r_paren() 4629 return self.expression(exp.Tuple, expressions=grouping_set) 4630 4631 return self._parse_column() 4632 4633 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4634 if not skip_having_token and not self._match(TokenType.HAVING): 4635 return None 4636 return self.expression( 4637 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4638 ) 4639 4640 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4641 if not self._match(TokenType.QUALIFY): 4642 return None 4643 return self.expression(exp.Qualify, this=self._parse_assignment()) 4644 4645 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4646 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4647 exp.Prior, this=self._parse_bitwise() 4648 ) 4649 connect = self._parse_assignment() 4650 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4651 return connect 4652 4653 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4654 if skip_start_token: 4655 start = None 4656 elif self._match(TokenType.START_WITH): 4657 start = self._parse_assignment() 4658 else: 4659 return None 4660 4661 self._match(TokenType.CONNECT_BY) 4662 nocycle = self._match_text_seq("NOCYCLE") 4663 connect = self._parse_connect_with_prior() 4664 4665 if not start and self._match(TokenType.START_WITH): 4666 start = self._parse_assignment() 4667 4668 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4669 4670 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4671 this = self._parse_id_var(any_token=True) 4672 if self._match(TokenType.ALIAS): 4673 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4674 return this 4675 4676 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4677 if self._match_text_seq("INTERPOLATE"): 4678 return self._parse_wrapped_csv(self._parse_name_as_expression) 4679 return None 4680 4681 def _parse_order( 4682 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4683 ) -> t.Optional[exp.Expression]: 4684 siblings = None 4685 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4686 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4687 return this 4688 4689 siblings = True 4690 4691 return self.expression( 4692 exp.Order, 4693 comments=self._prev_comments, 4694 this=this, 4695 expressions=self._parse_csv(self._parse_ordered), 4696 siblings=siblings, 4697 ) 4698 4699 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4700 if not self._match(token): 4701 return None 4702 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4703 4704 def _parse_ordered( 4705 self, parse_method: t.Optional[t.Callable] = None 4706 ) -> t.Optional[exp.Ordered]: 4707 this = parse_method() if parse_method else self._parse_assignment() 4708 if not this: 4709 return None 4710 4711 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4712 this = exp.var("ALL") 4713 4714 asc = self._match(TokenType.ASC) 4715 desc = self._match(TokenType.DESC) or (asc and False) 4716 4717 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4718 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4719 4720 nulls_first = is_nulls_first or False 4721 explicitly_null_ordered = is_nulls_first or is_nulls_last 4722 4723 if ( 4724 not explicitly_null_ordered 4725 and ( 4726 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4727 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4728 ) 4729 and self.dialect.NULL_ORDERING != "nulls_are_last" 4730 ): 4731 nulls_first = True 4732 4733 if self._match_text_seq("WITH", "FILL"): 4734 with_fill = self.expression( 4735 exp.WithFill, 4736 **{ # type: ignore 4737 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4738 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4739 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4740 "interpolate": self._parse_interpolate(), 4741 }, 4742 ) 4743 else: 4744 with_fill = None 4745 4746 return self.expression( 4747 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4748 ) 4749 4750 def _parse_limit_options(self) -> exp.LimitOptions: 4751 percent = self._match(TokenType.PERCENT) 4752 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4753 self._match_text_seq("ONLY") 4754 with_ties = self._match_text_seq("WITH", "TIES") 4755 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4756 4757 def _parse_limit( 4758 self, 4759 this: t.Optional[exp.Expression] = None, 4760 top: bool = False, 4761 skip_limit_token: bool = False, 4762 ) -> t.Optional[exp.Expression]: 4763 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4764 comments = self._prev_comments 4765 if top: 4766 limit_paren = self._match(TokenType.L_PAREN) 4767 expression = self._parse_term() if limit_paren else self._parse_number() 4768 4769 if limit_paren: 4770 self._match_r_paren() 4771 4772 limit_options = self._parse_limit_options() 4773 else: 4774 limit_options = None 4775 expression = self._parse_term() 4776 4777 if self._match(TokenType.COMMA): 4778 offset = expression 4779 expression = self._parse_term() 4780 else: 4781 offset = None 4782 4783 limit_exp = self.expression( 4784 exp.Limit, 4785 this=this, 4786 expression=expression, 4787 offset=offset, 4788 comments=comments, 4789 limit_options=limit_options, 4790 expressions=self._parse_limit_by(), 4791 ) 4792 4793 return limit_exp 4794 4795 if self._match(TokenType.FETCH): 4796 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4797 direction = self._prev.text.upper() if direction else "FIRST" 4798 4799 count = self._parse_field(tokens=self.FETCH_TOKENS) 4800 4801 return self.expression( 4802 exp.Fetch, 4803 direction=direction, 4804 count=count, 4805 limit_options=self._parse_limit_options(), 4806 ) 4807 4808 return this 4809 4810 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4811 if not self._match(TokenType.OFFSET): 4812 return this 4813 4814 count = self._parse_term() 4815 self._match_set((TokenType.ROW, TokenType.ROWS)) 4816 4817 return self.expression( 4818 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4819 ) 4820 4821 def _can_parse_limit_or_offset(self) -> bool: 4822 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4823 return False 4824 4825 index = self._index 4826 result = bool( 4827 self._try_parse(self._parse_limit, retreat=True) 4828 or self._try_parse(self._parse_offset, retreat=True) 4829 ) 4830 self._retreat(index) 4831 return result 4832 4833 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4834 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4835 4836 def _parse_locks(self) -> t.List[exp.Lock]: 4837 locks = [] 4838 while True: 4839 update, key = None, None 4840 if self._match_text_seq("FOR", "UPDATE"): 4841 update = True 4842 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4843 "LOCK", "IN", "SHARE", "MODE" 4844 ): 4845 update = False 4846 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4847 update, key = False, True 4848 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4849 update, key = True, True 4850 else: 4851 break 4852 4853 expressions = None 4854 if self._match_text_seq("OF"): 4855 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4856 4857 wait: t.Optional[bool | exp.Expression] = None 4858 if self._match_text_seq("NOWAIT"): 4859 wait = True 4860 elif self._match_text_seq("WAIT"): 4861 wait = self._parse_primary() 4862 elif self._match_text_seq("SKIP", "LOCKED"): 4863 wait = False 4864 4865 locks.append( 4866 self.expression( 4867 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4868 ) 4869 ) 4870 4871 return locks 4872 4873 def parse_set_operation( 4874 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4875 ) -> t.Optional[exp.Expression]: 4876 start = self._index 4877 _, side_token, kind_token = self._parse_join_parts() 4878 4879 side = side_token.text if side_token else None 4880 kind = kind_token.text if kind_token else None 4881 4882 if not self._match_set(self.SET_OPERATIONS): 4883 self._retreat(start) 4884 return None 4885 4886 token_type = self._prev.token_type 4887 4888 if token_type == TokenType.UNION: 4889 operation: t.Type[exp.SetOperation] = exp.Union 4890 elif token_type == TokenType.EXCEPT: 4891 operation = exp.Except 4892 else: 4893 operation = exp.Intersect 4894 4895 comments = self._prev.comments 4896 4897 if self._match(TokenType.DISTINCT): 4898 distinct: t.Optional[bool] = True 4899 elif self._match(TokenType.ALL): 4900 distinct = False 4901 else: 4902 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4903 if distinct is None: 4904 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4905 4906 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4907 "STRICT", "CORRESPONDING" 4908 ) 4909 if self._match_text_seq("CORRESPONDING"): 4910 by_name = True 4911 if not side and not kind: 4912 kind = "INNER" 4913 4914 on_column_list = None 4915 if by_name and self._match_texts(("ON", "BY")): 4916 on_column_list = self._parse_wrapped_csv(self._parse_column) 4917 4918 expression = self._parse_select( 4919 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4920 ) 4921 4922 return self.expression( 4923 operation, 4924 comments=comments, 4925 this=this, 4926 distinct=distinct, 4927 by_name=by_name, 4928 expression=expression, 4929 side=side, 4930 kind=kind, 4931 on=on_column_list, 4932 ) 4933 4934 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4935 while this: 4936 setop = self.parse_set_operation(this) 4937 if not setop: 4938 break 4939 this = setop 4940 4941 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4942 expression = this.expression 4943 4944 if expression: 4945 for arg in self.SET_OP_MODIFIERS: 4946 expr = expression.args.get(arg) 4947 if expr: 4948 this.set(arg, expr.pop()) 4949 4950 return this 4951 4952 def _parse_expression(self) -> t.Optional[exp.Expression]: 4953 return self._parse_alias(self._parse_assignment()) 4954 4955 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4956 this = self._parse_disjunction() 4957 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4958 # This allows us to parse <non-identifier token> := <expr> 4959 this = exp.column( 4960 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4961 ) 4962 4963 while self._match_set(self.ASSIGNMENT): 4964 if isinstance(this, exp.Column) and len(this.parts) == 1: 4965 this = this.this 4966 4967 this = self.expression( 4968 self.ASSIGNMENT[self._prev.token_type], 4969 this=this, 4970 comments=self._prev_comments, 4971 expression=self._parse_assignment(), 4972 ) 4973 4974 return this 4975 4976 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4977 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4978 4979 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4980 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4981 4982 def _parse_equality(self) -> t.Optional[exp.Expression]: 4983 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4984 4985 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4986 return self._parse_tokens(self._parse_range, self.COMPARISON) 4987 4988 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4989 this = this or self._parse_bitwise() 4990 negate = self._match(TokenType.NOT) 4991 4992 if self._match_set(self.RANGE_PARSERS): 4993 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4994 if not expression: 4995 return this 4996 4997 this = expression 4998 elif self._match(TokenType.ISNULL): 4999 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5000 5001 # Postgres supports ISNULL and NOTNULL for conditions. 5002 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5003 if self._match(TokenType.NOTNULL): 5004 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5005 this = self.expression(exp.Not, this=this) 5006 5007 if negate: 5008 this = self._negate_range(this) 5009 5010 if self._match(TokenType.IS): 5011 this = self._parse_is(this) 5012 5013 return this 5014 5015 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5016 if not this: 5017 return this 5018 5019 return self.expression(exp.Not, this=this) 5020 5021 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5022 index = self._index - 1 5023 negate = self._match(TokenType.NOT) 5024 5025 if self._match_text_seq("DISTINCT", "FROM"): 5026 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5027 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5028 5029 if self._match(TokenType.JSON): 5030 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5031 5032 if self._match_text_seq("WITH"): 5033 _with = True 5034 elif self._match_text_seq("WITHOUT"): 5035 _with = False 5036 else: 5037 _with = None 5038 5039 unique = self._match(TokenType.UNIQUE) 5040 self._match_text_seq("KEYS") 5041 expression: t.Optional[exp.Expression] = self.expression( 5042 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5043 ) 5044 else: 5045 expression = self._parse_primary() or self._parse_null() 5046 if not expression: 5047 self._retreat(index) 5048 return None 5049 5050 this = self.expression(exp.Is, this=this, expression=expression) 5051 return self.expression(exp.Not, this=this) if negate else this 5052 5053 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5054 unnest = self._parse_unnest(with_alias=False) 5055 if unnest: 5056 this = self.expression(exp.In, this=this, unnest=unnest) 5057 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5058 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5059 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5060 5061 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5062 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5063 else: 5064 this = self.expression(exp.In, this=this, expressions=expressions) 5065 5066 if matched_l_paren: 5067 self._match_r_paren(this) 5068 elif not self._match(TokenType.R_BRACKET, expression=this): 5069 self.raise_error("Expecting ]") 5070 else: 5071 this = self.expression(exp.In, this=this, field=self._parse_column()) 5072 5073 return this 5074 5075 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5076 symmetric = None 5077 if self._match_text_seq("SYMMETRIC"): 5078 symmetric = True 5079 elif self._match_text_seq("ASYMMETRIC"): 5080 symmetric = False 5081 5082 low = self._parse_bitwise() 5083 self._match(TokenType.AND) 5084 high = self._parse_bitwise() 5085 5086 return self.expression( 5087 exp.Between, 5088 this=this, 5089 low=low, 5090 high=high, 5091 symmetric=symmetric, 5092 ) 5093 5094 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5095 if not self._match(TokenType.ESCAPE): 5096 return this 5097 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5098 5099 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5100 index = self._index 5101 5102 if not self._match(TokenType.INTERVAL) and match_interval: 5103 return None 5104 5105 if self._match(TokenType.STRING, advance=False): 5106 this = self._parse_primary() 5107 else: 5108 this = self._parse_term() 5109 5110 if not this or ( 5111 isinstance(this, exp.Column) 5112 and not this.table 5113 and not this.this.quoted 5114 and self._curr 5115 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5116 ): 5117 self._retreat(index) 5118 return None 5119 5120 # handle day-time format interval span with omitted units: 5121 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5122 interval_span_units_omitted = None 5123 if ( 5124 this 5125 and this.is_string 5126 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5127 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5128 ): 5129 index = self._index 5130 5131 # Var "TO" Var 5132 first_unit = self._parse_var(any_token=True, upper=True) 5133 second_unit = None 5134 if first_unit and self._match_text_seq("TO"): 5135 second_unit = self._parse_var(any_token=True, upper=True) 5136 5137 interval_span_units_omitted = not (first_unit and second_unit) 5138 5139 self._retreat(index) 5140 5141 unit = ( 5142 None 5143 if interval_span_units_omitted 5144 else ( 5145 self._parse_function() 5146 or ( 5147 not self._match(TokenType.ALIAS, advance=False) 5148 and self._parse_var(any_token=True, upper=True) 5149 ) 5150 ) 5151 ) 5152 5153 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5154 # each INTERVAL expression into this canonical form so it's easy to transpile 5155 if this and this.is_number: 5156 this = exp.Literal.string(this.to_py()) 5157 elif this and this.is_string: 5158 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5159 if parts and unit: 5160 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5161 unit = None 5162 self._retreat(self._index - 1) 5163 5164 if len(parts) == 1: 5165 this = exp.Literal.string(parts[0][0]) 5166 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5167 5168 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5169 unit = self.expression( 5170 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5171 ) 5172 5173 interval = self.expression(exp.Interval, this=this, unit=unit) 5174 5175 index = self._index 5176 self._match(TokenType.PLUS) 5177 5178 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5179 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5180 return self.expression( 5181 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5182 ) 5183 5184 self._retreat(index) 5185 return interval 5186 5187 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5188 this = self._parse_term() 5189 5190 while True: 5191 if self._match_set(self.BITWISE): 5192 this = self.expression( 5193 self.BITWISE[self._prev.token_type], 5194 this=this, 5195 expression=self._parse_term(), 5196 ) 5197 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5198 this = self.expression( 5199 exp.DPipe, 5200 this=this, 5201 expression=self._parse_term(), 5202 safe=not self.dialect.STRICT_STRING_CONCAT, 5203 ) 5204 elif self._match(TokenType.DQMARK): 5205 this = self.expression( 5206 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5207 ) 5208 elif self._match_pair(TokenType.LT, TokenType.LT): 5209 this = self.expression( 5210 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5211 ) 5212 elif self._match_pair(TokenType.GT, TokenType.GT): 5213 this = self.expression( 5214 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5215 ) 5216 else: 5217 break 5218 5219 return this 5220 5221 def _parse_term(self) -> t.Optional[exp.Expression]: 5222 this = self._parse_factor() 5223 5224 while self._match_set(self.TERM): 5225 klass = self.TERM[self._prev.token_type] 5226 comments = self._prev_comments 5227 expression = self._parse_factor() 5228 5229 this = self.expression(klass, this=this, comments=comments, expression=expression) 5230 5231 if isinstance(this, exp.Collate): 5232 expr = this.expression 5233 5234 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5235 # fallback to Identifier / Var 5236 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5237 ident = expr.this 5238 if isinstance(ident, exp.Identifier): 5239 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5240 5241 return this 5242 5243 def _parse_factor(self) -> t.Optional[exp.Expression]: 5244 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5245 this = parse_method() 5246 5247 while self._match_set(self.FACTOR): 5248 klass = self.FACTOR[self._prev.token_type] 5249 comments = self._prev_comments 5250 expression = parse_method() 5251 5252 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5253 self._retreat(self._index - 1) 5254 return this 5255 5256 this = self.expression(klass, this=this, comments=comments, expression=expression) 5257 5258 if isinstance(this, exp.Div): 5259 this.args["typed"] = self.dialect.TYPED_DIVISION 5260 this.args["safe"] = self.dialect.SAFE_DIVISION 5261 5262 return this 5263 5264 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5265 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5266 5267 def _parse_unary(self) -> t.Optional[exp.Expression]: 5268 if self._match_set(self.UNARY_PARSERS): 5269 return self.UNARY_PARSERS[self._prev.token_type](self) 5270 return self._parse_at_time_zone(self._parse_type()) 5271 5272 def _parse_type( 5273 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5274 ) -> t.Optional[exp.Expression]: 5275 interval = parse_interval and self._parse_interval() 5276 if interval: 5277 return interval 5278 5279 index = self._index 5280 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5281 5282 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5283 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5284 if isinstance(data_type, exp.Cast): 5285 # This constructor can contain ops directly after it, for instance struct unnesting: 5286 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5287 return self._parse_column_ops(data_type) 5288 5289 if data_type: 5290 index2 = self._index 5291 this = self._parse_primary() 5292 5293 if isinstance(this, exp.Literal): 5294 literal = this.name 5295 this = self._parse_column_ops(this) 5296 5297 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5298 if parser: 5299 return parser(self, this, data_type) 5300 5301 if ( 5302 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5303 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5304 and TIME_ZONE_RE.search(literal) 5305 ): 5306 data_type = exp.DataType.build("TIMESTAMPTZ") 5307 5308 return self.expression(exp.Cast, this=this, to=data_type) 5309 5310 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5311 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5312 # 5313 # If the index difference here is greater than 1, that means the parser itself must have 5314 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5315 # 5316 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5317 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5318 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5319 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5320 # 5321 # In these cases, we don't really want to return the converted type, but instead retreat 5322 # and try to parse a Column or Identifier in the section below. 5323 if data_type.expressions and index2 - index > 1: 5324 self._retreat(index2) 5325 return self._parse_column_ops(data_type) 5326 5327 self._retreat(index) 5328 5329 if fallback_to_identifier: 5330 return self._parse_id_var() 5331 5332 this = self._parse_column() 5333 return this and self._parse_column_ops(this) 5334 5335 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5336 this = self._parse_type() 5337 if not this: 5338 return None 5339 5340 if isinstance(this, exp.Column) and not this.table: 5341 this = exp.var(this.name.upper()) 5342 5343 return self.expression( 5344 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5345 ) 5346 5347 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5348 type_name = identifier.name 5349 5350 while self._match(TokenType.DOT): 5351 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5352 5353 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5354 5355 def _parse_types( 5356 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5357 ) -> t.Optional[exp.Expression]: 5358 index = self._index 5359 5360 this: t.Optional[exp.Expression] = None 5361 prefix = self._match_text_seq("SYSUDTLIB", ".") 5362 5363 if self._match_set(self.TYPE_TOKENS): 5364 type_token = self._prev.token_type 5365 else: 5366 type_token = None 5367 identifier = allow_identifiers and self._parse_id_var( 5368 any_token=False, tokens=(TokenType.VAR,) 5369 ) 5370 if isinstance(identifier, exp.Identifier): 5371 try: 5372 tokens = self.dialect.tokenize(identifier.name) 5373 except TokenError: 5374 tokens = None 5375 5376 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5377 type_token = tokens[0].token_type 5378 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5379 this = self._parse_user_defined_type(identifier) 5380 else: 5381 self._retreat(self._index - 1) 5382 return None 5383 else: 5384 return None 5385 5386 if type_token == TokenType.PSEUDO_TYPE: 5387 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5388 5389 if type_token == TokenType.OBJECT_IDENTIFIER: 5390 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5391 5392 # https://materialize.com/docs/sql/types/map/ 5393 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5394 key_type = self._parse_types( 5395 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5396 ) 5397 if not self._match(TokenType.FARROW): 5398 self._retreat(index) 5399 return None 5400 5401 value_type = self._parse_types( 5402 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5403 ) 5404 if not self._match(TokenType.R_BRACKET): 5405 self._retreat(index) 5406 return None 5407 5408 return exp.DataType( 5409 this=exp.DataType.Type.MAP, 5410 expressions=[key_type, value_type], 5411 nested=True, 5412 prefix=prefix, 5413 ) 5414 5415 nested = type_token in self.NESTED_TYPE_TOKENS 5416 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5417 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5418 expressions = None 5419 maybe_func = False 5420 5421 if self._match(TokenType.L_PAREN): 5422 if is_struct: 5423 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5424 elif nested: 5425 expressions = self._parse_csv( 5426 lambda: self._parse_types( 5427 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5428 ) 5429 ) 5430 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5431 this = expressions[0] 5432 this.set("nullable", True) 5433 self._match_r_paren() 5434 return this 5435 elif type_token in self.ENUM_TYPE_TOKENS: 5436 expressions = self._parse_csv(self._parse_equality) 5437 elif is_aggregate: 5438 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5439 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5440 ) 5441 if not func_or_ident: 5442 return None 5443 expressions = [func_or_ident] 5444 if self._match(TokenType.COMMA): 5445 expressions.extend( 5446 self._parse_csv( 5447 lambda: self._parse_types( 5448 check_func=check_func, 5449 schema=schema, 5450 allow_identifiers=allow_identifiers, 5451 ) 5452 ) 5453 ) 5454 else: 5455 expressions = self._parse_csv(self._parse_type_size) 5456 5457 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5458 if type_token == TokenType.VECTOR and len(expressions) == 2: 5459 expressions = self._parse_vector_expressions(expressions) 5460 5461 if not self._match(TokenType.R_PAREN): 5462 self._retreat(index) 5463 return None 5464 5465 maybe_func = True 5466 5467 values: t.Optional[t.List[exp.Expression]] = None 5468 5469 if nested and self._match(TokenType.LT): 5470 if is_struct: 5471 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5472 else: 5473 expressions = self._parse_csv( 5474 lambda: self._parse_types( 5475 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5476 ) 5477 ) 5478 5479 if not self._match(TokenType.GT): 5480 self.raise_error("Expecting >") 5481 5482 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5483 values = self._parse_csv(self._parse_assignment) 5484 if not values and is_struct: 5485 values = None 5486 self._retreat(self._index - 1) 5487 else: 5488 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5489 5490 if type_token in self.TIMESTAMPS: 5491 if self._match_text_seq("WITH", "TIME", "ZONE"): 5492 maybe_func = False 5493 tz_type = ( 5494 exp.DataType.Type.TIMETZ 5495 if type_token in self.TIMES 5496 else exp.DataType.Type.TIMESTAMPTZ 5497 ) 5498 this = exp.DataType(this=tz_type, expressions=expressions) 5499 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5500 maybe_func = False 5501 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5502 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5503 maybe_func = False 5504 elif type_token == TokenType.INTERVAL: 5505 unit = self._parse_var(upper=True) 5506 if unit: 5507 if self._match_text_seq("TO"): 5508 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5509 5510 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5511 else: 5512 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5513 elif type_token == TokenType.VOID: 5514 this = exp.DataType(this=exp.DataType.Type.NULL) 5515 5516 if maybe_func and check_func: 5517 index2 = self._index 5518 peek = self._parse_string() 5519 5520 if not peek: 5521 self._retreat(index) 5522 return None 5523 5524 self._retreat(index2) 5525 5526 if not this: 5527 if self._match_text_seq("UNSIGNED"): 5528 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5529 if not unsigned_type_token: 5530 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5531 5532 type_token = unsigned_type_token or type_token 5533 5534 # NULLABLE without parentheses can be a column (Presto/Trino) 5535 if type_token == TokenType.NULLABLE and not expressions: 5536 self._retreat(index) 5537 return None 5538 5539 this = exp.DataType( 5540 this=exp.DataType.Type[type_token.value], 5541 expressions=expressions, 5542 nested=nested, 5543 prefix=prefix, 5544 ) 5545 5546 # Empty arrays/structs are allowed 5547 if values is not None: 5548 cls = exp.Struct if is_struct else exp.Array 5549 this = exp.cast(cls(expressions=values), this, copy=False) 5550 5551 elif expressions: 5552 this.set("expressions", expressions) 5553 5554 # https://materialize.com/docs/sql/types/list/#type-name 5555 while self._match(TokenType.LIST): 5556 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5557 5558 index = self._index 5559 5560 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5561 matched_array = self._match(TokenType.ARRAY) 5562 5563 while self._curr: 5564 datatype_token = self._prev.token_type 5565 matched_l_bracket = self._match(TokenType.L_BRACKET) 5566 5567 if (not matched_l_bracket and not matched_array) or ( 5568 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5569 ): 5570 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5571 # not to be confused with the fixed size array parsing 5572 break 5573 5574 matched_array = False 5575 values = self._parse_csv(self._parse_assignment) or None 5576 if ( 5577 values 5578 and not schema 5579 and ( 5580 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5581 ) 5582 ): 5583 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5584 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5585 self._retreat(index) 5586 break 5587 5588 this = exp.DataType( 5589 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5590 ) 5591 self._match(TokenType.R_BRACKET) 5592 5593 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5594 converter = self.TYPE_CONVERTERS.get(this.this) 5595 if converter: 5596 this = converter(t.cast(exp.DataType, this)) 5597 5598 return this 5599 5600 def _parse_vector_expressions( 5601 self, expressions: t.List[exp.Expression] 5602 ) -> t.List[exp.Expression]: 5603 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5604 5605 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5606 index = self._index 5607 5608 if ( 5609 self._curr 5610 and self._next 5611 and self._curr.token_type in self.TYPE_TOKENS 5612 and self._next.token_type in self.TYPE_TOKENS 5613 ): 5614 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5615 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5616 this = self._parse_id_var() 5617 else: 5618 this = ( 5619 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5620 or self._parse_id_var() 5621 ) 5622 5623 self._match(TokenType.COLON) 5624 5625 if ( 5626 type_required 5627 and not isinstance(this, exp.DataType) 5628 and not self._match_set(self.TYPE_TOKENS, advance=False) 5629 ): 5630 self._retreat(index) 5631 return self._parse_types() 5632 5633 return self._parse_column_def(this) 5634 5635 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5636 if not self._match_text_seq("AT", "TIME", "ZONE"): 5637 return this 5638 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5639 5640 def _parse_column(self) -> t.Optional[exp.Expression]: 5641 this = self._parse_column_reference() 5642 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5643 5644 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5645 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5646 5647 return column 5648 5649 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5650 this = self._parse_field() 5651 if ( 5652 not this 5653 and self._match(TokenType.VALUES, advance=False) 5654 and self.VALUES_FOLLOWED_BY_PAREN 5655 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5656 ): 5657 this = self._parse_id_var() 5658 5659 if isinstance(this, exp.Identifier): 5660 # We bubble up comments from the Identifier to the Column 5661 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5662 5663 return this 5664 5665 def _parse_colon_as_variant_extract( 5666 self, this: t.Optional[exp.Expression] 5667 ) -> t.Optional[exp.Expression]: 5668 casts = [] 5669 json_path = [] 5670 escape = None 5671 5672 while self._match(TokenType.COLON): 5673 start_index = self._index 5674 5675 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5676 path = self._parse_column_ops( 5677 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5678 ) 5679 5680 # The cast :: operator has a lower precedence than the extraction operator :, so 5681 # we rearrange the AST appropriately to avoid casting the JSON path 5682 while isinstance(path, exp.Cast): 5683 casts.append(path.to) 5684 path = path.this 5685 5686 if casts: 5687 dcolon_offset = next( 5688 i 5689 for i, t in enumerate(self._tokens[start_index:]) 5690 if t.token_type == TokenType.DCOLON 5691 ) 5692 end_token = self._tokens[start_index + dcolon_offset - 1] 5693 else: 5694 end_token = self._prev 5695 5696 if path: 5697 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5698 # it'll roundtrip to a string literal in GET_PATH 5699 if isinstance(path, exp.Identifier) and path.quoted: 5700 escape = True 5701 5702 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5703 5704 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5705 # Databricks transforms it back to the colon/dot notation 5706 if json_path: 5707 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5708 5709 if json_path_expr: 5710 json_path_expr.set("escape", escape) 5711 5712 this = self.expression( 5713 exp.JSONExtract, 5714 this=this, 5715 expression=json_path_expr, 5716 variant_extract=True, 5717 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5718 ) 5719 5720 while casts: 5721 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5722 5723 return this 5724 5725 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5726 return self._parse_types() 5727 5728 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5729 this = self._parse_bracket(this) 5730 5731 while self._match_set(self.COLUMN_OPERATORS): 5732 op_token = self._prev.token_type 5733 op = self.COLUMN_OPERATORS.get(op_token) 5734 5735 if op_token in self.CAST_COLUMN_OPERATORS: 5736 field = self._parse_dcolon() 5737 if not field: 5738 self.raise_error("Expected type") 5739 elif op and self._curr: 5740 field = self._parse_column_reference() or self._parse_bitwise() 5741 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5742 field = self._parse_column_ops(field) 5743 else: 5744 field = self._parse_field(any_token=True, anonymous_func=True) 5745 5746 # Function calls can be qualified, e.g., x.y.FOO() 5747 # This converts the final AST to a series of Dots leading to the function call 5748 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5749 if isinstance(field, (exp.Func, exp.Window)) and this: 5750 this = this.transform( 5751 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5752 ) 5753 5754 if op: 5755 this = op(self, this, field) 5756 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5757 this = self.expression( 5758 exp.Column, 5759 comments=this.comments, 5760 this=field, 5761 table=this.this, 5762 db=this.args.get("table"), 5763 catalog=this.args.get("db"), 5764 ) 5765 elif isinstance(field, exp.Window): 5766 # Move the exp.Dot's to the window's function 5767 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5768 field.set("this", window_func) 5769 this = field 5770 else: 5771 this = self.expression(exp.Dot, this=this, expression=field) 5772 5773 if field and field.comments: 5774 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5775 5776 this = self._parse_bracket(this) 5777 5778 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5779 5780 def _parse_paren(self) -> t.Optional[exp.Expression]: 5781 if not self._match(TokenType.L_PAREN): 5782 return None 5783 5784 comments = self._prev_comments 5785 query = self._parse_select() 5786 5787 if query: 5788 expressions = [query] 5789 else: 5790 expressions = self._parse_expressions() 5791 5792 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5793 5794 if not this and self._match(TokenType.R_PAREN, advance=False): 5795 this = self.expression(exp.Tuple) 5796 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5797 this = self._parse_subquery(this=this, parse_alias=False) 5798 elif isinstance(this, exp.Subquery): 5799 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5800 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5801 this = self.expression(exp.Tuple, expressions=expressions) 5802 else: 5803 this = self.expression(exp.Paren, this=this) 5804 5805 if this: 5806 this.add_comments(comments) 5807 5808 self._match_r_paren(expression=this) 5809 5810 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5811 return self._parse_window(this) 5812 5813 return this 5814 5815 def _parse_primary(self) -> t.Optional[exp.Expression]: 5816 if self._match_set(self.PRIMARY_PARSERS): 5817 token_type = self._prev.token_type 5818 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5819 5820 if token_type == TokenType.STRING: 5821 expressions = [primary] 5822 while self._match(TokenType.STRING): 5823 expressions.append(exp.Literal.string(self._prev.text)) 5824 5825 if len(expressions) > 1: 5826 return self.expression(exp.Concat, expressions=expressions) 5827 5828 return primary 5829 5830 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5831 return exp.Literal.number(f"0.{self._prev.text}") 5832 5833 return self._parse_paren() 5834 5835 def _parse_field( 5836 self, 5837 any_token: bool = False, 5838 tokens: t.Optional[t.Collection[TokenType]] = None, 5839 anonymous_func: bool = False, 5840 ) -> t.Optional[exp.Expression]: 5841 if anonymous_func: 5842 field = ( 5843 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5844 or self._parse_primary() 5845 ) 5846 else: 5847 field = self._parse_primary() or self._parse_function( 5848 anonymous=anonymous_func, any_token=any_token 5849 ) 5850 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5851 5852 def _parse_function( 5853 self, 5854 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5855 anonymous: bool = False, 5856 optional_parens: bool = True, 5857 any_token: bool = False, 5858 ) -> t.Optional[exp.Expression]: 5859 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5860 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5861 fn_syntax = False 5862 if ( 5863 self._match(TokenType.L_BRACE, advance=False) 5864 and self._next 5865 and self._next.text.upper() == "FN" 5866 ): 5867 self._advance(2) 5868 fn_syntax = True 5869 5870 func = self._parse_function_call( 5871 functions=functions, 5872 anonymous=anonymous, 5873 optional_parens=optional_parens, 5874 any_token=any_token, 5875 ) 5876 5877 if fn_syntax: 5878 self._match(TokenType.R_BRACE) 5879 5880 return func 5881 5882 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5883 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5884 5885 def _parse_function_call( 5886 self, 5887 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5888 anonymous: bool = False, 5889 optional_parens: bool = True, 5890 any_token: bool = False, 5891 ) -> t.Optional[exp.Expression]: 5892 if not self._curr: 5893 return None 5894 5895 comments = self._curr.comments 5896 prev = self._prev 5897 token = self._curr 5898 token_type = self._curr.token_type 5899 this = self._curr.text 5900 upper = this.upper() 5901 5902 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5903 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5904 self._advance() 5905 return self._parse_window(parser(self)) 5906 5907 if not self._next or self._next.token_type != TokenType.L_PAREN: 5908 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5909 self._advance() 5910 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5911 5912 return None 5913 5914 if any_token: 5915 if token_type in self.RESERVED_TOKENS: 5916 return None 5917 elif token_type not in self.FUNC_TOKENS: 5918 return None 5919 5920 self._advance(2) 5921 5922 parser = self.FUNCTION_PARSERS.get(upper) 5923 if parser and not anonymous: 5924 this = parser(self) 5925 else: 5926 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5927 5928 if subquery_predicate: 5929 expr = None 5930 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5931 expr = self._parse_select() 5932 self._match_r_paren() 5933 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5934 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5935 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5936 self._advance(-1) 5937 expr = self._parse_bitwise() 5938 5939 if expr: 5940 return self.expression(subquery_predicate, comments=comments, this=expr) 5941 5942 if functions is None: 5943 functions = self.FUNCTIONS 5944 5945 function = functions.get(upper) 5946 known_function = function and not anonymous 5947 5948 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5949 args = self._parse_function_args(alias) 5950 5951 post_func_comments = self._curr and self._curr.comments 5952 if known_function and post_func_comments: 5953 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5954 # call we'll construct it as exp.Anonymous, even if it's "known" 5955 if any( 5956 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5957 for comment in post_func_comments 5958 ): 5959 known_function = False 5960 5961 if alias and known_function: 5962 args = self._kv_to_prop_eq(args) 5963 5964 if known_function: 5965 func_builder = t.cast(t.Callable, function) 5966 5967 if "dialect" in func_builder.__code__.co_varnames: 5968 func = func_builder(args, dialect=self.dialect) 5969 else: 5970 func = func_builder(args) 5971 5972 func = self.validate_expression(func, args) 5973 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5974 func.meta["name"] = this 5975 5976 this = func 5977 else: 5978 if token_type == TokenType.IDENTIFIER: 5979 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5980 5981 this = self.expression(exp.Anonymous, this=this, expressions=args) 5982 this = this.update_positions(token) 5983 5984 if isinstance(this, exp.Expression): 5985 this.add_comments(comments) 5986 5987 self._match_r_paren(this) 5988 return self._parse_window(this) 5989 5990 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5991 return expression 5992 5993 def _kv_to_prop_eq( 5994 self, expressions: t.List[exp.Expression], parse_map: bool = False 5995 ) -> t.List[exp.Expression]: 5996 transformed = [] 5997 5998 for index, e in enumerate(expressions): 5999 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6000 if isinstance(e, exp.Alias): 6001 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6002 6003 if not isinstance(e, exp.PropertyEQ): 6004 e = self.expression( 6005 exp.PropertyEQ, 6006 this=e.this if parse_map else exp.to_identifier(e.this.name), 6007 expression=e.expression, 6008 ) 6009 6010 if isinstance(e.this, exp.Column): 6011 e.this.replace(e.this.this) 6012 else: 6013 e = self._to_prop_eq(e, index) 6014 6015 transformed.append(e) 6016 6017 return transformed 6018 6019 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6020 return self._parse_statement() 6021 6022 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6023 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6024 6025 def _parse_user_defined_function( 6026 self, kind: t.Optional[TokenType] = None 6027 ) -> t.Optional[exp.Expression]: 6028 this = self._parse_table_parts(schema=True) 6029 6030 if not self._match(TokenType.L_PAREN): 6031 return this 6032 6033 expressions = self._parse_csv(self._parse_function_parameter) 6034 self._match_r_paren() 6035 return self.expression( 6036 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6037 ) 6038 6039 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6040 literal = self._parse_primary() 6041 if literal: 6042 return self.expression(exp.Introducer, this=token.text, expression=literal) 6043 6044 return self._identifier_expression(token) 6045 6046 def _parse_session_parameter(self) -> exp.SessionParameter: 6047 kind = None 6048 this = self._parse_id_var() or self._parse_primary() 6049 6050 if this and self._match(TokenType.DOT): 6051 kind = this.name 6052 this = self._parse_var() or self._parse_primary() 6053 6054 return self.expression(exp.SessionParameter, this=this, kind=kind) 6055 6056 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6057 return self._parse_id_var() 6058 6059 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6060 index = self._index 6061 6062 if self._match(TokenType.L_PAREN): 6063 expressions = t.cast( 6064 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6065 ) 6066 6067 if not self._match(TokenType.R_PAREN): 6068 self._retreat(index) 6069 else: 6070 expressions = [self._parse_lambda_arg()] 6071 6072 if self._match_set(self.LAMBDAS): 6073 return self.LAMBDAS[self._prev.token_type](self, expressions) 6074 6075 self._retreat(index) 6076 6077 this: t.Optional[exp.Expression] 6078 6079 if self._match(TokenType.DISTINCT): 6080 this = self.expression( 6081 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6082 ) 6083 else: 6084 this = self._parse_select_or_expression(alias=alias) 6085 6086 return self._parse_limit( 6087 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6088 ) 6089 6090 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6091 index = self._index 6092 if not self._match(TokenType.L_PAREN): 6093 return this 6094 6095 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6096 # expr can be of both types 6097 if self._match_set(self.SELECT_START_TOKENS): 6098 self._retreat(index) 6099 return this 6100 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6101 self._match_r_paren() 6102 return self.expression(exp.Schema, this=this, expressions=args) 6103 6104 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6105 return self._parse_column_def(self._parse_field(any_token=True)) 6106 6107 def _parse_column_def( 6108 self, this: t.Optional[exp.Expression], computed_column: bool = True 6109 ) -> t.Optional[exp.Expression]: 6110 # column defs are not really columns, they're identifiers 6111 if isinstance(this, exp.Column): 6112 this = this.this 6113 6114 if not computed_column: 6115 self._match(TokenType.ALIAS) 6116 6117 kind = self._parse_types(schema=True) 6118 6119 if self._match_text_seq("FOR", "ORDINALITY"): 6120 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6121 6122 constraints: t.List[exp.Expression] = [] 6123 6124 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6125 ("ALIAS", "MATERIALIZED") 6126 ): 6127 persisted = self._prev.text.upper() == "MATERIALIZED" 6128 constraint_kind = exp.ComputedColumnConstraint( 6129 this=self._parse_assignment(), 6130 persisted=persisted or self._match_text_seq("PERSISTED"), 6131 data_type=exp.Var(this="AUTO") 6132 if self._match_text_seq("AUTO") 6133 else self._parse_types(), 6134 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6135 ) 6136 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6137 elif ( 6138 kind 6139 and self._match(TokenType.ALIAS, advance=False) 6140 and ( 6141 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6142 or (self._next and self._next.token_type == TokenType.L_PAREN) 6143 ) 6144 ): 6145 self._advance() 6146 constraints.append( 6147 self.expression( 6148 exp.ColumnConstraint, 6149 kind=exp.ComputedColumnConstraint( 6150 this=self._parse_disjunction(), 6151 persisted=self._match_texts(("STORED", "VIRTUAL")) 6152 and self._prev.text.upper() == "STORED", 6153 ), 6154 ) 6155 ) 6156 6157 while True: 6158 constraint = self._parse_column_constraint() 6159 if not constraint: 6160 break 6161 constraints.append(constraint) 6162 6163 if not kind and not constraints: 6164 return this 6165 6166 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6167 6168 def _parse_auto_increment( 6169 self, 6170 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6171 start = None 6172 increment = None 6173 order = None 6174 6175 if self._match(TokenType.L_PAREN, advance=False): 6176 args = self._parse_wrapped_csv(self._parse_bitwise) 6177 start = seq_get(args, 0) 6178 increment = seq_get(args, 1) 6179 elif self._match_text_seq("START"): 6180 start = self._parse_bitwise() 6181 self._match_text_seq("INCREMENT") 6182 increment = self._parse_bitwise() 6183 if self._match_text_seq("ORDER"): 6184 order = True 6185 elif self._match_text_seq("NOORDER"): 6186 order = False 6187 6188 if start and increment: 6189 return exp.GeneratedAsIdentityColumnConstraint( 6190 start=start, increment=increment, this=False, order=order 6191 ) 6192 6193 return exp.AutoIncrementColumnConstraint() 6194 6195 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6196 if not self._match_text_seq("REFRESH"): 6197 self._retreat(self._index - 1) 6198 return None 6199 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6200 6201 def _parse_compress(self) -> exp.CompressColumnConstraint: 6202 if self._match(TokenType.L_PAREN, advance=False): 6203 return self.expression( 6204 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6205 ) 6206 6207 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6208 6209 def _parse_generated_as_identity( 6210 self, 6211 ) -> ( 6212 exp.GeneratedAsIdentityColumnConstraint 6213 | exp.ComputedColumnConstraint 6214 | exp.GeneratedAsRowColumnConstraint 6215 ): 6216 if self._match_text_seq("BY", "DEFAULT"): 6217 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6218 this = self.expression( 6219 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6220 ) 6221 else: 6222 self._match_text_seq("ALWAYS") 6223 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6224 6225 self._match(TokenType.ALIAS) 6226 6227 if self._match_text_seq("ROW"): 6228 start = self._match_text_seq("START") 6229 if not start: 6230 self._match(TokenType.END) 6231 hidden = self._match_text_seq("HIDDEN") 6232 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6233 6234 identity = self._match_text_seq("IDENTITY") 6235 6236 if self._match(TokenType.L_PAREN): 6237 if self._match(TokenType.START_WITH): 6238 this.set("start", self._parse_bitwise()) 6239 if self._match_text_seq("INCREMENT", "BY"): 6240 this.set("increment", self._parse_bitwise()) 6241 if self._match_text_seq("MINVALUE"): 6242 this.set("minvalue", self._parse_bitwise()) 6243 if self._match_text_seq("MAXVALUE"): 6244 this.set("maxvalue", self._parse_bitwise()) 6245 6246 if self._match_text_seq("CYCLE"): 6247 this.set("cycle", True) 6248 elif self._match_text_seq("NO", "CYCLE"): 6249 this.set("cycle", False) 6250 6251 if not identity: 6252 this.set("expression", self._parse_range()) 6253 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6254 args = self._parse_csv(self._parse_bitwise) 6255 this.set("start", seq_get(args, 0)) 6256 this.set("increment", seq_get(args, 1)) 6257 6258 self._match_r_paren() 6259 6260 return this 6261 6262 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6263 self._match_text_seq("LENGTH") 6264 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6265 6266 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6267 if self._match_text_seq("NULL"): 6268 return self.expression(exp.NotNullColumnConstraint) 6269 if self._match_text_seq("CASESPECIFIC"): 6270 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6271 if self._match_text_seq("FOR", "REPLICATION"): 6272 return self.expression(exp.NotForReplicationColumnConstraint) 6273 6274 # Unconsume the `NOT` token 6275 self._retreat(self._index - 1) 6276 return None 6277 6278 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6279 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6280 6281 procedure_option_follows = ( 6282 self._match(TokenType.WITH, advance=False) 6283 and self._next 6284 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6285 ) 6286 6287 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6288 return self.expression( 6289 exp.ColumnConstraint, 6290 this=this, 6291 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6292 ) 6293 6294 return this 6295 6296 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6297 if not self._match(TokenType.CONSTRAINT): 6298 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6299 6300 return self.expression( 6301 exp.Constraint, 6302 this=self._parse_id_var(), 6303 expressions=self._parse_unnamed_constraints(), 6304 ) 6305 6306 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6307 constraints = [] 6308 while True: 6309 constraint = self._parse_unnamed_constraint() or self._parse_function() 6310 if not constraint: 6311 break 6312 constraints.append(constraint) 6313 6314 return constraints 6315 6316 def _parse_unnamed_constraint( 6317 self, constraints: t.Optional[t.Collection[str]] = None 6318 ) -> t.Optional[exp.Expression]: 6319 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6320 constraints or self.CONSTRAINT_PARSERS 6321 ): 6322 return None 6323 6324 constraint = self._prev.text.upper() 6325 if constraint not in self.CONSTRAINT_PARSERS: 6326 self.raise_error(f"No parser found for schema constraint {constraint}.") 6327 6328 return self.CONSTRAINT_PARSERS[constraint](self) 6329 6330 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6331 return self._parse_id_var(any_token=False) 6332 6333 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6334 self._match_texts(("KEY", "INDEX")) 6335 return self.expression( 6336 exp.UniqueColumnConstraint, 6337 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6338 this=self._parse_schema(self._parse_unique_key()), 6339 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6340 on_conflict=self._parse_on_conflict(), 6341 options=self._parse_key_constraint_options(), 6342 ) 6343 6344 def _parse_key_constraint_options(self) -> t.List[str]: 6345 options = [] 6346 while True: 6347 if not self._curr: 6348 break 6349 6350 if self._match(TokenType.ON): 6351 action = None 6352 on = self._advance_any() and self._prev.text 6353 6354 if self._match_text_seq("NO", "ACTION"): 6355 action = "NO ACTION" 6356 elif self._match_text_seq("CASCADE"): 6357 action = "CASCADE" 6358 elif self._match_text_seq("RESTRICT"): 6359 action = "RESTRICT" 6360 elif self._match_pair(TokenType.SET, TokenType.NULL): 6361 action = "SET NULL" 6362 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6363 action = "SET DEFAULT" 6364 else: 6365 self.raise_error("Invalid key constraint") 6366 6367 options.append(f"ON {on} {action}") 6368 else: 6369 var = self._parse_var_from_options( 6370 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6371 ) 6372 if not var: 6373 break 6374 options.append(var.name) 6375 6376 return options 6377 6378 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6379 if match and not self._match(TokenType.REFERENCES): 6380 return None 6381 6382 expressions = None 6383 this = self._parse_table(schema=True) 6384 options = self._parse_key_constraint_options() 6385 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6386 6387 def _parse_foreign_key(self) -> exp.ForeignKey: 6388 expressions = ( 6389 self._parse_wrapped_id_vars() 6390 if not self._match(TokenType.REFERENCES, advance=False) 6391 else None 6392 ) 6393 reference = self._parse_references() 6394 on_options = {} 6395 6396 while self._match(TokenType.ON): 6397 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6398 self.raise_error("Expected DELETE or UPDATE") 6399 6400 kind = self._prev.text.lower() 6401 6402 if self._match_text_seq("NO", "ACTION"): 6403 action = "NO ACTION" 6404 elif self._match(TokenType.SET): 6405 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6406 action = "SET " + self._prev.text.upper() 6407 else: 6408 self._advance() 6409 action = self._prev.text.upper() 6410 6411 on_options[kind] = action 6412 6413 return self.expression( 6414 exp.ForeignKey, 6415 expressions=expressions, 6416 reference=reference, 6417 options=self._parse_key_constraint_options(), 6418 **on_options, # type: ignore 6419 ) 6420 6421 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6422 return self._parse_ordered() or self._parse_field() 6423 6424 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6425 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6426 self._retreat(self._index - 1) 6427 return None 6428 6429 id_vars = self._parse_wrapped_id_vars() 6430 return self.expression( 6431 exp.PeriodForSystemTimeConstraint, 6432 this=seq_get(id_vars, 0), 6433 expression=seq_get(id_vars, 1), 6434 ) 6435 6436 def _parse_primary_key( 6437 self, wrapped_optional: bool = False, in_props: bool = False 6438 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6439 desc = ( 6440 self._match_set((TokenType.ASC, TokenType.DESC)) 6441 and self._prev.token_type == TokenType.DESC 6442 ) 6443 6444 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6445 return self.expression( 6446 exp.PrimaryKeyColumnConstraint, 6447 desc=desc, 6448 options=self._parse_key_constraint_options(), 6449 ) 6450 6451 expressions = self._parse_wrapped_csv( 6452 self._parse_primary_key_part, optional=wrapped_optional 6453 ) 6454 6455 return self.expression( 6456 exp.PrimaryKey, 6457 expressions=expressions, 6458 include=self._parse_index_params(), 6459 options=self._parse_key_constraint_options(), 6460 ) 6461 6462 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6463 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6464 6465 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6466 """ 6467 Parses a datetime column in ODBC format. We parse the column into the corresponding 6468 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6469 same as we did for `DATE('yyyy-mm-dd')`. 6470 6471 Reference: 6472 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6473 """ 6474 self._match(TokenType.VAR) 6475 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6476 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6477 if not self._match(TokenType.R_BRACE): 6478 self.raise_error("Expected }") 6479 return expression 6480 6481 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6482 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6483 return this 6484 6485 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6486 map_token = seq_get(self._tokens, self._index - 2) 6487 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6488 else: 6489 parse_map = False 6490 6491 bracket_kind = self._prev.token_type 6492 if ( 6493 bracket_kind == TokenType.L_BRACE 6494 and self._curr 6495 and self._curr.token_type == TokenType.VAR 6496 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6497 ): 6498 return self._parse_odbc_datetime_literal() 6499 6500 expressions = self._parse_csv( 6501 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6502 ) 6503 6504 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6505 self.raise_error("Expected ]") 6506 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6507 self.raise_error("Expected }") 6508 6509 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6510 if bracket_kind == TokenType.L_BRACE: 6511 this = self.expression( 6512 exp.Struct, 6513 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6514 ) 6515 elif not this: 6516 this = build_array_constructor( 6517 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6518 ) 6519 else: 6520 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6521 if constructor_type: 6522 return build_array_constructor( 6523 constructor_type, 6524 args=expressions, 6525 bracket_kind=bracket_kind, 6526 dialect=self.dialect, 6527 ) 6528 6529 expressions = apply_index_offset( 6530 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6531 ) 6532 this = self.expression( 6533 exp.Bracket, 6534 this=this, 6535 expressions=expressions, 6536 comments=this.pop_comments(), 6537 ) 6538 6539 self._add_comments(this) 6540 return self._parse_bracket(this) 6541 6542 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6543 if self._match(TokenType.COLON): 6544 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6545 return this 6546 6547 def _parse_case(self) -> t.Optional[exp.Expression]: 6548 if self._match(TokenType.DOT, advance=False): 6549 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6550 self._retreat(self._index - 1) 6551 return None 6552 6553 ifs = [] 6554 default = None 6555 6556 comments = self._prev_comments 6557 expression = self._parse_assignment() 6558 6559 while self._match(TokenType.WHEN): 6560 this = self._parse_assignment() 6561 self._match(TokenType.THEN) 6562 then = self._parse_assignment() 6563 ifs.append(self.expression(exp.If, this=this, true=then)) 6564 6565 if self._match(TokenType.ELSE): 6566 default = self._parse_assignment() 6567 6568 if not self._match(TokenType.END): 6569 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6570 default = exp.column("interval") 6571 else: 6572 self.raise_error("Expected END after CASE", self._prev) 6573 6574 return self.expression( 6575 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6576 ) 6577 6578 def _parse_if(self) -> t.Optional[exp.Expression]: 6579 if self._match(TokenType.L_PAREN): 6580 args = self._parse_csv( 6581 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6582 ) 6583 this = self.validate_expression(exp.If.from_arg_list(args), args) 6584 self._match_r_paren() 6585 else: 6586 index = self._index - 1 6587 6588 if self.NO_PAREN_IF_COMMANDS and index == 0: 6589 return self._parse_as_command(self._prev) 6590 6591 condition = self._parse_assignment() 6592 6593 if not condition: 6594 self._retreat(index) 6595 return None 6596 6597 self._match(TokenType.THEN) 6598 true = self._parse_assignment() 6599 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6600 self._match(TokenType.END) 6601 this = self.expression(exp.If, this=condition, true=true, false=false) 6602 6603 return this 6604 6605 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6606 if not self._match_text_seq("VALUE", "FOR"): 6607 self._retreat(self._index - 1) 6608 return None 6609 6610 return self.expression( 6611 exp.NextValueFor, 6612 this=self._parse_column(), 6613 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6614 ) 6615 6616 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6617 this = self._parse_function() or self._parse_var_or_string(upper=True) 6618 6619 if self._match(TokenType.FROM): 6620 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6621 6622 if not self._match(TokenType.COMMA): 6623 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6624 6625 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6626 6627 def _parse_gap_fill(self) -> exp.GapFill: 6628 self._match(TokenType.TABLE) 6629 this = self._parse_table() 6630 6631 self._match(TokenType.COMMA) 6632 args = [this, *self._parse_csv(self._parse_lambda)] 6633 6634 gap_fill = exp.GapFill.from_arg_list(args) 6635 return self.validate_expression(gap_fill, args) 6636 6637 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6638 this = self._parse_assignment() 6639 6640 if not self._match(TokenType.ALIAS): 6641 if self._match(TokenType.COMMA): 6642 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6643 6644 self.raise_error("Expected AS after CAST") 6645 6646 fmt = None 6647 to = self._parse_types() 6648 6649 default = self._match(TokenType.DEFAULT) 6650 if default: 6651 default = self._parse_bitwise() 6652 self._match_text_seq("ON", "CONVERSION", "ERROR") 6653 6654 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6655 fmt_string = self._parse_string() 6656 fmt = self._parse_at_time_zone(fmt_string) 6657 6658 if not to: 6659 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6660 if to.this in exp.DataType.TEMPORAL_TYPES: 6661 this = self.expression( 6662 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6663 this=this, 6664 format=exp.Literal.string( 6665 format_time( 6666 fmt_string.this if fmt_string else "", 6667 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6668 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6669 ) 6670 ), 6671 safe=safe, 6672 ) 6673 6674 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6675 this.set("zone", fmt.args["zone"]) 6676 return this 6677 elif not to: 6678 self.raise_error("Expected TYPE after CAST") 6679 elif isinstance(to, exp.Identifier): 6680 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6681 elif to.this == exp.DataType.Type.CHAR: 6682 if self._match(TokenType.CHARACTER_SET): 6683 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6684 6685 return self.build_cast( 6686 strict=strict, 6687 this=this, 6688 to=to, 6689 format=fmt, 6690 safe=safe, 6691 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6692 default=default, 6693 ) 6694 6695 def _parse_string_agg(self) -> exp.GroupConcat: 6696 if self._match(TokenType.DISTINCT): 6697 args: t.List[t.Optional[exp.Expression]] = [ 6698 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6699 ] 6700 if self._match(TokenType.COMMA): 6701 args.extend(self._parse_csv(self._parse_assignment)) 6702 else: 6703 args = self._parse_csv(self._parse_assignment) # type: ignore 6704 6705 if self._match_text_seq("ON", "OVERFLOW"): 6706 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6707 if self._match_text_seq("ERROR"): 6708 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6709 else: 6710 self._match_text_seq("TRUNCATE") 6711 on_overflow = self.expression( 6712 exp.OverflowTruncateBehavior, 6713 this=self._parse_string(), 6714 with_count=( 6715 self._match_text_seq("WITH", "COUNT") 6716 or not self._match_text_seq("WITHOUT", "COUNT") 6717 ), 6718 ) 6719 else: 6720 on_overflow = None 6721 6722 index = self._index 6723 if not self._match(TokenType.R_PAREN) and args: 6724 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6725 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6726 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6727 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6728 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6729 6730 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6731 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6732 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6733 if not self._match_text_seq("WITHIN", "GROUP"): 6734 self._retreat(index) 6735 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6736 6737 # The corresponding match_r_paren will be called in parse_function (caller) 6738 self._match_l_paren() 6739 6740 return self.expression( 6741 exp.GroupConcat, 6742 this=self._parse_order(this=seq_get(args, 0)), 6743 separator=seq_get(args, 1), 6744 on_overflow=on_overflow, 6745 ) 6746 6747 def _parse_convert( 6748 self, strict: bool, safe: t.Optional[bool] = None 6749 ) -> t.Optional[exp.Expression]: 6750 this = self._parse_bitwise() 6751 6752 if self._match(TokenType.USING): 6753 to: t.Optional[exp.Expression] = self.expression( 6754 exp.CharacterSet, this=self._parse_var() 6755 ) 6756 elif self._match(TokenType.COMMA): 6757 to = self._parse_types() 6758 else: 6759 to = None 6760 6761 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6762 6763 def _parse_xml_table(self) -> exp.XMLTable: 6764 namespaces = None 6765 passing = None 6766 columns = None 6767 6768 if self._match_text_seq("XMLNAMESPACES", "("): 6769 namespaces = self._parse_xml_namespace() 6770 self._match_text_seq(")", ",") 6771 6772 this = self._parse_string() 6773 6774 if self._match_text_seq("PASSING"): 6775 # The BY VALUE keywords are optional and are provided for semantic clarity 6776 self._match_text_seq("BY", "VALUE") 6777 passing = self._parse_csv(self._parse_column) 6778 6779 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6780 6781 if self._match_text_seq("COLUMNS"): 6782 columns = self._parse_csv(self._parse_field_def) 6783 6784 return self.expression( 6785 exp.XMLTable, 6786 this=this, 6787 namespaces=namespaces, 6788 passing=passing, 6789 columns=columns, 6790 by_ref=by_ref, 6791 ) 6792 6793 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6794 namespaces = [] 6795 6796 while True: 6797 if self._match(TokenType.DEFAULT): 6798 uri = self._parse_string() 6799 else: 6800 uri = self._parse_alias(self._parse_string()) 6801 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6802 if not self._match(TokenType.COMMA): 6803 break 6804 6805 return namespaces 6806 6807 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6808 args = self._parse_csv(self._parse_assignment) 6809 6810 if len(args) < 3: 6811 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6812 6813 return self.expression(exp.DecodeCase, expressions=args) 6814 6815 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6816 self._match_text_seq("KEY") 6817 key = self._parse_column() 6818 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6819 self._match_text_seq("VALUE") 6820 value = self._parse_bitwise() 6821 6822 if not key and not value: 6823 return None 6824 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6825 6826 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6827 if not this or not self._match_text_seq("FORMAT", "JSON"): 6828 return this 6829 6830 return self.expression(exp.FormatJson, this=this) 6831 6832 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6833 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6834 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6835 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6836 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6837 else: 6838 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6839 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6840 6841 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6842 6843 if not empty and not error and not null: 6844 return None 6845 6846 return self.expression( 6847 exp.OnCondition, 6848 empty=empty, 6849 error=error, 6850 null=null, 6851 ) 6852 6853 def _parse_on_handling( 6854 self, on: str, *values: str 6855 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6856 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6857 for value in values: 6858 if self._match_text_seq(value, "ON", on): 6859 return f"{value} ON {on}" 6860 6861 index = self._index 6862 if self._match(TokenType.DEFAULT): 6863 default_value = self._parse_bitwise() 6864 if self._match_text_seq("ON", on): 6865 return default_value 6866 6867 self._retreat(index) 6868 6869 return None 6870 6871 @t.overload 6872 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6873 6874 @t.overload 6875 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6876 6877 def _parse_json_object(self, agg=False): 6878 star = self._parse_star() 6879 expressions = ( 6880 [star] 6881 if star 6882 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6883 ) 6884 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6885 6886 unique_keys = None 6887 if self._match_text_seq("WITH", "UNIQUE"): 6888 unique_keys = True 6889 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6890 unique_keys = False 6891 6892 self._match_text_seq("KEYS") 6893 6894 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6895 self._parse_type() 6896 ) 6897 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6898 6899 return self.expression( 6900 exp.JSONObjectAgg if agg else exp.JSONObject, 6901 expressions=expressions, 6902 null_handling=null_handling, 6903 unique_keys=unique_keys, 6904 return_type=return_type, 6905 encoding=encoding, 6906 ) 6907 6908 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6909 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6910 if not self._match_text_seq("NESTED"): 6911 this = self._parse_id_var() 6912 kind = self._parse_types(allow_identifiers=False) 6913 nested = None 6914 else: 6915 this = None 6916 kind = None 6917 nested = True 6918 6919 path = self._match_text_seq("PATH") and self._parse_string() 6920 nested_schema = nested and self._parse_json_schema() 6921 6922 return self.expression( 6923 exp.JSONColumnDef, 6924 this=this, 6925 kind=kind, 6926 path=path, 6927 nested_schema=nested_schema, 6928 ) 6929 6930 def _parse_json_schema(self) -> exp.JSONSchema: 6931 self._match_text_seq("COLUMNS") 6932 return self.expression( 6933 exp.JSONSchema, 6934 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6935 ) 6936 6937 def _parse_json_table(self) -> exp.JSONTable: 6938 this = self._parse_format_json(self._parse_bitwise()) 6939 path = self._match(TokenType.COMMA) and self._parse_string() 6940 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6941 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6942 schema = self._parse_json_schema() 6943 6944 return exp.JSONTable( 6945 this=this, 6946 schema=schema, 6947 path=path, 6948 error_handling=error_handling, 6949 empty_handling=empty_handling, 6950 ) 6951 6952 def _parse_match_against(self) -> exp.MatchAgainst: 6953 if self._match_text_seq("TABLE"): 6954 # parse SingleStore MATCH(TABLE ...) syntax 6955 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6956 expressions = [] 6957 table = self._parse_table() 6958 if table: 6959 expressions = [table] 6960 else: 6961 expressions = self._parse_csv(self._parse_column) 6962 6963 self._match_text_seq(")", "AGAINST", "(") 6964 6965 this = self._parse_string() 6966 6967 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6968 modifier = "IN NATURAL LANGUAGE MODE" 6969 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6970 modifier = f"{modifier} WITH QUERY EXPANSION" 6971 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6972 modifier = "IN BOOLEAN MODE" 6973 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6974 modifier = "WITH QUERY EXPANSION" 6975 else: 6976 modifier = None 6977 6978 return self.expression( 6979 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6980 ) 6981 6982 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6983 def _parse_open_json(self) -> exp.OpenJSON: 6984 this = self._parse_bitwise() 6985 path = self._match(TokenType.COMMA) and self._parse_string() 6986 6987 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6988 this = self._parse_field(any_token=True) 6989 kind = self._parse_types() 6990 path = self._parse_string() 6991 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6992 6993 return self.expression( 6994 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6995 ) 6996 6997 expressions = None 6998 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6999 self._match_l_paren() 7000 expressions = self._parse_csv(_parse_open_json_column_def) 7001 7002 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7003 7004 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7005 args = self._parse_csv(self._parse_bitwise) 7006 7007 if self._match(TokenType.IN): 7008 return self.expression( 7009 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7010 ) 7011 7012 if haystack_first: 7013 haystack = seq_get(args, 0) 7014 needle = seq_get(args, 1) 7015 else: 7016 haystack = seq_get(args, 1) 7017 needle = seq_get(args, 0) 7018 7019 return self.expression( 7020 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7021 ) 7022 7023 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7024 args = self._parse_csv(self._parse_table) 7025 return exp.JoinHint(this=func_name.upper(), expressions=args) 7026 7027 def _parse_substring(self) -> exp.Substring: 7028 # Postgres supports the form: substring(string [from int] [for int]) 7029 # (despite being undocumented, the reverse order also works) 7030 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7031 7032 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7033 7034 start, length = None, None 7035 7036 while self._curr: 7037 if self._match(TokenType.FROM): 7038 start = self._parse_bitwise() 7039 elif self._match(TokenType.FOR): 7040 if not start: 7041 start = exp.Literal.number(1) 7042 length = self._parse_bitwise() 7043 else: 7044 break 7045 7046 if start: 7047 args.append(start) 7048 if length: 7049 args.append(length) 7050 7051 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7052 7053 def _parse_trim(self) -> exp.Trim: 7054 # https://www.w3resource.com/sql/character-functions/trim.php 7055 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7056 7057 position = None 7058 collation = None 7059 expression = None 7060 7061 if self._match_texts(self.TRIM_TYPES): 7062 position = self._prev.text.upper() 7063 7064 this = self._parse_bitwise() 7065 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7066 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7067 expression = self._parse_bitwise() 7068 7069 if invert_order: 7070 this, expression = expression, this 7071 7072 if self._match(TokenType.COLLATE): 7073 collation = self._parse_bitwise() 7074 7075 return self.expression( 7076 exp.Trim, this=this, position=position, expression=expression, collation=collation 7077 ) 7078 7079 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7080 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7081 7082 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7083 return self._parse_window(self._parse_id_var(), alias=True) 7084 7085 def _parse_respect_or_ignore_nulls( 7086 self, this: t.Optional[exp.Expression] 7087 ) -> t.Optional[exp.Expression]: 7088 if self._match_text_seq("IGNORE", "NULLS"): 7089 return self.expression(exp.IgnoreNulls, this=this) 7090 if self._match_text_seq("RESPECT", "NULLS"): 7091 return self.expression(exp.RespectNulls, this=this) 7092 return this 7093 7094 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7095 if self._match(TokenType.HAVING): 7096 self._match_texts(("MAX", "MIN")) 7097 max = self._prev.text.upper() != "MIN" 7098 return self.expression( 7099 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7100 ) 7101 7102 return this 7103 7104 def _parse_window( 7105 self, this: t.Optional[exp.Expression], alias: bool = False 7106 ) -> t.Optional[exp.Expression]: 7107 func = this 7108 comments = func.comments if isinstance(func, exp.Expression) else None 7109 7110 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7111 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7112 if self._match_text_seq("WITHIN", "GROUP"): 7113 order = self._parse_wrapped(self._parse_order) 7114 this = self.expression(exp.WithinGroup, this=this, expression=order) 7115 7116 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7117 self._match(TokenType.WHERE) 7118 this = self.expression( 7119 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7120 ) 7121 self._match_r_paren() 7122 7123 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7124 # Some dialects choose to implement and some do not. 7125 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7126 7127 # There is some code above in _parse_lambda that handles 7128 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7129 7130 # The below changes handle 7131 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7132 7133 # Oracle allows both formats 7134 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7135 # and Snowflake chose to do the same for familiarity 7136 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7137 if isinstance(this, exp.AggFunc): 7138 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7139 7140 if ignore_respect and ignore_respect is not this: 7141 ignore_respect.replace(ignore_respect.this) 7142 this = self.expression(ignore_respect.__class__, this=this) 7143 7144 this = self._parse_respect_or_ignore_nulls(this) 7145 7146 # bigquery select from window x AS (partition by ...) 7147 if alias: 7148 over = None 7149 self._match(TokenType.ALIAS) 7150 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7151 return this 7152 else: 7153 over = self._prev.text.upper() 7154 7155 if comments and isinstance(func, exp.Expression): 7156 func.pop_comments() 7157 7158 if not self._match(TokenType.L_PAREN): 7159 return self.expression( 7160 exp.Window, 7161 comments=comments, 7162 this=this, 7163 alias=self._parse_id_var(False), 7164 over=over, 7165 ) 7166 7167 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7168 7169 first = self._match(TokenType.FIRST) 7170 if self._match_text_seq("LAST"): 7171 first = False 7172 7173 partition, order = self._parse_partition_and_order() 7174 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7175 7176 if kind: 7177 self._match(TokenType.BETWEEN) 7178 start = self._parse_window_spec() 7179 7180 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7181 exclude = ( 7182 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7183 if self._match_text_seq("EXCLUDE") 7184 else None 7185 ) 7186 7187 spec = self.expression( 7188 exp.WindowSpec, 7189 kind=kind, 7190 start=start["value"], 7191 start_side=start["side"], 7192 end=end.get("value"), 7193 end_side=end.get("side"), 7194 exclude=exclude, 7195 ) 7196 else: 7197 spec = None 7198 7199 self._match_r_paren() 7200 7201 window = self.expression( 7202 exp.Window, 7203 comments=comments, 7204 this=this, 7205 partition_by=partition, 7206 order=order, 7207 spec=spec, 7208 alias=window_alias, 7209 over=over, 7210 first=first, 7211 ) 7212 7213 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7214 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7215 return self._parse_window(window, alias=alias) 7216 7217 return window 7218 7219 def _parse_partition_and_order( 7220 self, 7221 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7222 return self._parse_partition_by(), self._parse_order() 7223 7224 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7225 self._match(TokenType.BETWEEN) 7226 7227 return { 7228 "value": ( 7229 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7230 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7231 or self._parse_type() 7232 ), 7233 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7234 } 7235 7236 def _parse_alias( 7237 self, this: t.Optional[exp.Expression], explicit: bool = False 7238 ) -> t.Optional[exp.Expression]: 7239 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7240 # so this section tries to parse the clause version and if it fails, it treats the token 7241 # as an identifier (alias) 7242 if self._can_parse_limit_or_offset(): 7243 return this 7244 7245 any_token = self._match(TokenType.ALIAS) 7246 comments = self._prev_comments or [] 7247 7248 if explicit and not any_token: 7249 return this 7250 7251 if self._match(TokenType.L_PAREN): 7252 aliases = self.expression( 7253 exp.Aliases, 7254 comments=comments, 7255 this=this, 7256 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7257 ) 7258 self._match_r_paren(aliases) 7259 return aliases 7260 7261 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7262 self.STRING_ALIASES and self._parse_string_as_identifier() 7263 ) 7264 7265 if alias: 7266 comments.extend(alias.pop_comments()) 7267 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7268 column = this.this 7269 7270 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7271 if not this.comments and column and column.comments: 7272 this.comments = column.pop_comments() 7273 7274 return this 7275 7276 def _parse_id_var( 7277 self, 7278 any_token: bool = True, 7279 tokens: t.Optional[t.Collection[TokenType]] = None, 7280 ) -> t.Optional[exp.Expression]: 7281 expression = self._parse_identifier() 7282 if not expression and ( 7283 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7284 ): 7285 quoted = self._prev.token_type == TokenType.STRING 7286 expression = self._identifier_expression(quoted=quoted) 7287 7288 return expression 7289 7290 def _parse_string(self) -> t.Optional[exp.Expression]: 7291 if self._match_set(self.STRING_PARSERS): 7292 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7293 return self._parse_placeholder() 7294 7295 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7296 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7297 if output: 7298 output.update_positions(self._prev) 7299 return output 7300 7301 def _parse_number(self) -> t.Optional[exp.Expression]: 7302 if self._match_set(self.NUMERIC_PARSERS): 7303 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7304 return self._parse_placeholder() 7305 7306 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7307 if self._match(TokenType.IDENTIFIER): 7308 return self._identifier_expression(quoted=True) 7309 return self._parse_placeholder() 7310 7311 def _parse_var( 7312 self, 7313 any_token: bool = False, 7314 tokens: t.Optional[t.Collection[TokenType]] = None, 7315 upper: bool = False, 7316 ) -> t.Optional[exp.Expression]: 7317 if ( 7318 (any_token and self._advance_any()) 7319 or self._match(TokenType.VAR) 7320 or (self._match_set(tokens) if tokens else False) 7321 ): 7322 return self.expression( 7323 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7324 ) 7325 return self._parse_placeholder() 7326 7327 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7328 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7329 self._advance() 7330 return self._prev 7331 return None 7332 7333 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7334 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7335 7336 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7337 return self._parse_primary() or self._parse_var(any_token=True) 7338 7339 def _parse_null(self) -> t.Optional[exp.Expression]: 7340 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7341 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7342 return self._parse_placeholder() 7343 7344 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7345 if self._match(TokenType.TRUE): 7346 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7347 if self._match(TokenType.FALSE): 7348 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7349 return self._parse_placeholder() 7350 7351 def _parse_star(self) -> t.Optional[exp.Expression]: 7352 if self._match(TokenType.STAR): 7353 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7354 return self._parse_placeholder() 7355 7356 def _parse_parameter(self) -> exp.Parameter: 7357 this = self._parse_identifier() or self._parse_primary_or_var() 7358 return self.expression(exp.Parameter, this=this) 7359 7360 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7361 if self._match_set(self.PLACEHOLDER_PARSERS): 7362 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7363 if placeholder: 7364 return placeholder 7365 self._advance(-1) 7366 return None 7367 7368 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7369 if not self._match_texts(keywords): 7370 return None 7371 if self._match(TokenType.L_PAREN, advance=False): 7372 return self._parse_wrapped_csv(self._parse_expression) 7373 7374 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7375 return [expression] if expression else None 7376 7377 def _parse_csv( 7378 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7379 ) -> t.List[exp.Expression]: 7380 parse_result = parse_method() 7381 items = [parse_result] if parse_result is not None else [] 7382 7383 while self._match(sep): 7384 self._add_comments(parse_result) 7385 parse_result = parse_method() 7386 if parse_result is not None: 7387 items.append(parse_result) 7388 7389 return items 7390 7391 def _parse_tokens( 7392 self, parse_method: t.Callable, expressions: t.Dict 7393 ) -> t.Optional[exp.Expression]: 7394 this = parse_method() 7395 7396 while self._match_set(expressions): 7397 this = self.expression( 7398 expressions[self._prev.token_type], 7399 this=this, 7400 comments=self._prev_comments, 7401 expression=parse_method(), 7402 ) 7403 7404 return this 7405 7406 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7407 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7408 7409 def _parse_wrapped_csv( 7410 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7411 ) -> t.List[exp.Expression]: 7412 return self._parse_wrapped( 7413 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7414 ) 7415 7416 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7417 wrapped = self._match(TokenType.L_PAREN) 7418 if not wrapped and not optional: 7419 self.raise_error("Expecting (") 7420 parse_result = parse_method() 7421 if wrapped: 7422 self._match_r_paren() 7423 return parse_result 7424 7425 def _parse_expressions(self) -> t.List[exp.Expression]: 7426 return self._parse_csv(self._parse_expression) 7427 7428 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7429 return ( 7430 self._parse_set_operations( 7431 self._parse_alias(self._parse_assignment(), explicit=True) 7432 if alias 7433 else self._parse_assignment() 7434 ) 7435 or self._parse_select() 7436 ) 7437 7438 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7439 return self._parse_query_modifiers( 7440 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7441 ) 7442 7443 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7444 this = None 7445 if self._match_texts(self.TRANSACTION_KIND): 7446 this = self._prev.text 7447 7448 self._match_texts(("TRANSACTION", "WORK")) 7449 7450 modes = [] 7451 while True: 7452 mode = [] 7453 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7454 mode.append(self._prev.text) 7455 7456 if mode: 7457 modes.append(" ".join(mode)) 7458 if not self._match(TokenType.COMMA): 7459 break 7460 7461 return self.expression(exp.Transaction, this=this, modes=modes) 7462 7463 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7464 chain = None 7465 savepoint = None 7466 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7467 7468 self._match_texts(("TRANSACTION", "WORK")) 7469 7470 if self._match_text_seq("TO"): 7471 self._match_text_seq("SAVEPOINT") 7472 savepoint = self._parse_id_var() 7473 7474 if self._match(TokenType.AND): 7475 chain = not self._match_text_seq("NO") 7476 self._match_text_seq("CHAIN") 7477 7478 if is_rollback: 7479 return self.expression(exp.Rollback, savepoint=savepoint) 7480 7481 return self.expression(exp.Commit, chain=chain) 7482 7483 def _parse_refresh(self) -> exp.Refresh: 7484 self._match(TokenType.TABLE) 7485 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7486 7487 def _parse_column_def_with_exists(self): 7488 start = self._index 7489 self._match(TokenType.COLUMN) 7490 7491 exists_column = self._parse_exists(not_=True) 7492 expression = self._parse_field_def() 7493 7494 if not isinstance(expression, exp.ColumnDef): 7495 self._retreat(start) 7496 return None 7497 7498 expression.set("exists", exists_column) 7499 7500 return expression 7501 7502 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7503 if not self._prev.text.upper() == "ADD": 7504 return None 7505 7506 expression = self._parse_column_def_with_exists() 7507 if not expression: 7508 return None 7509 7510 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7511 if self._match_texts(("FIRST", "AFTER")): 7512 position = self._prev.text 7513 column_position = self.expression( 7514 exp.ColumnPosition, this=self._parse_column(), position=position 7515 ) 7516 expression.set("position", column_position) 7517 7518 return expression 7519 7520 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7521 drop = self._match(TokenType.DROP) and self._parse_drop() 7522 if drop and not isinstance(drop, exp.Command): 7523 drop.set("kind", drop.args.get("kind", "COLUMN")) 7524 return drop 7525 7526 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7527 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7528 return self.expression( 7529 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7530 ) 7531 7532 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7533 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7534 self._match_text_seq("ADD") 7535 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7536 return self.expression( 7537 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7538 ) 7539 7540 column_def = self._parse_add_column() 7541 if isinstance(column_def, exp.ColumnDef): 7542 return column_def 7543 7544 exists = self._parse_exists(not_=True) 7545 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7546 return self.expression( 7547 exp.AddPartition, 7548 exists=exists, 7549 this=self._parse_field(any_token=True), 7550 location=self._match_text_seq("LOCATION", advance=False) 7551 and self._parse_property(), 7552 ) 7553 7554 return None 7555 7556 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7557 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7558 or self._match_text_seq("COLUMNS") 7559 ): 7560 schema = self._parse_schema() 7561 7562 return ( 7563 ensure_list(schema) 7564 if schema 7565 else self._parse_csv(self._parse_column_def_with_exists) 7566 ) 7567 7568 return self._parse_csv(_parse_add_alteration) 7569 7570 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7571 if self._match_texts(self.ALTER_ALTER_PARSERS): 7572 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7573 7574 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7575 # keyword after ALTER we default to parsing this statement 7576 self._match(TokenType.COLUMN) 7577 column = self._parse_field(any_token=True) 7578 7579 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7580 return self.expression(exp.AlterColumn, this=column, drop=True) 7581 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7582 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7583 if self._match(TokenType.COMMENT): 7584 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7585 if self._match_text_seq("DROP", "NOT", "NULL"): 7586 return self.expression( 7587 exp.AlterColumn, 7588 this=column, 7589 drop=True, 7590 allow_null=True, 7591 ) 7592 if self._match_text_seq("SET", "NOT", "NULL"): 7593 return self.expression( 7594 exp.AlterColumn, 7595 this=column, 7596 allow_null=False, 7597 ) 7598 7599 if self._match_text_seq("SET", "VISIBLE"): 7600 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7601 if self._match_text_seq("SET", "INVISIBLE"): 7602 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7603 7604 self._match_text_seq("SET", "DATA") 7605 self._match_text_seq("TYPE") 7606 return self.expression( 7607 exp.AlterColumn, 7608 this=column, 7609 dtype=self._parse_types(), 7610 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7611 using=self._match(TokenType.USING) and self._parse_assignment(), 7612 ) 7613 7614 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7615 if self._match_texts(("ALL", "EVEN", "AUTO")): 7616 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7617 7618 self._match_text_seq("KEY", "DISTKEY") 7619 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7620 7621 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7622 if compound: 7623 self._match_text_seq("SORTKEY") 7624 7625 if self._match(TokenType.L_PAREN, advance=False): 7626 return self.expression( 7627 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7628 ) 7629 7630 self._match_texts(("AUTO", "NONE")) 7631 return self.expression( 7632 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7633 ) 7634 7635 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7636 index = self._index - 1 7637 7638 partition_exists = self._parse_exists() 7639 if self._match(TokenType.PARTITION, advance=False): 7640 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7641 7642 self._retreat(index) 7643 return self._parse_csv(self._parse_drop_column) 7644 7645 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7646 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7647 exists = self._parse_exists() 7648 old_column = self._parse_column() 7649 to = self._match_text_seq("TO") 7650 new_column = self._parse_column() 7651 7652 if old_column is None or to is None or new_column is None: 7653 return None 7654 7655 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7656 7657 self._match_text_seq("TO") 7658 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7659 7660 def _parse_alter_table_set(self) -> exp.AlterSet: 7661 alter_set = self.expression(exp.AlterSet) 7662 7663 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7664 "TABLE", "PROPERTIES" 7665 ): 7666 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7667 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7668 alter_set.set("expressions", [self._parse_assignment()]) 7669 elif self._match_texts(("LOGGED", "UNLOGGED")): 7670 alter_set.set("option", exp.var(self._prev.text.upper())) 7671 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7672 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7673 elif self._match_text_seq("LOCATION"): 7674 alter_set.set("location", self._parse_field()) 7675 elif self._match_text_seq("ACCESS", "METHOD"): 7676 alter_set.set("access_method", self._parse_field()) 7677 elif self._match_text_seq("TABLESPACE"): 7678 alter_set.set("tablespace", self._parse_field()) 7679 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7680 alter_set.set("file_format", [self._parse_field()]) 7681 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7682 alter_set.set("file_format", self._parse_wrapped_options()) 7683 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7684 alter_set.set("copy_options", self._parse_wrapped_options()) 7685 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7686 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7687 else: 7688 if self._match_text_seq("SERDE"): 7689 alter_set.set("serde", self._parse_field()) 7690 7691 properties = self._parse_wrapped(self._parse_properties, optional=True) 7692 alter_set.set("expressions", [properties]) 7693 7694 return alter_set 7695 7696 def _parse_alter_session(self) -> exp.AlterSession: 7697 """Parse ALTER SESSION SET/UNSET statements.""" 7698 if self._match(TokenType.SET): 7699 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7700 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7701 7702 self._match_text_seq("UNSET") 7703 expressions = self._parse_csv( 7704 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7705 ) 7706 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7707 7708 def _parse_alter(self) -> exp.Alter | exp.Command: 7709 start = self._prev 7710 7711 alter_token = self._match_set(self.ALTERABLES) and self._prev 7712 if not alter_token: 7713 return self._parse_as_command(start) 7714 7715 exists = self._parse_exists() 7716 only = self._match_text_seq("ONLY") 7717 7718 if alter_token.token_type == TokenType.SESSION: 7719 this = None 7720 check = None 7721 cluster = None 7722 else: 7723 this = self._parse_table(schema=True) 7724 check = self._match_text_seq("WITH", "CHECK") 7725 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7726 7727 if self._next: 7728 self._advance() 7729 7730 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7731 if parser: 7732 actions = ensure_list(parser(self)) 7733 not_valid = self._match_text_seq("NOT", "VALID") 7734 options = self._parse_csv(self._parse_property) 7735 7736 if not self._curr and actions: 7737 return self.expression( 7738 exp.Alter, 7739 this=this, 7740 kind=alter_token.text.upper(), 7741 exists=exists, 7742 actions=actions, 7743 only=only, 7744 options=options, 7745 cluster=cluster, 7746 not_valid=not_valid, 7747 check=check, 7748 ) 7749 7750 return self._parse_as_command(start) 7751 7752 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7753 start = self._prev 7754 # https://duckdb.org/docs/sql/statements/analyze 7755 if not self._curr: 7756 return self.expression(exp.Analyze) 7757 7758 options = [] 7759 while self._match_texts(self.ANALYZE_STYLES): 7760 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7761 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7762 else: 7763 options.append(self._prev.text.upper()) 7764 7765 this: t.Optional[exp.Expression] = None 7766 inner_expression: t.Optional[exp.Expression] = None 7767 7768 kind = self._curr and self._curr.text.upper() 7769 7770 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7771 this = self._parse_table_parts() 7772 elif self._match_text_seq("TABLES"): 7773 if self._match_set((TokenType.FROM, TokenType.IN)): 7774 kind = f"{kind} {self._prev.text.upper()}" 7775 this = self._parse_table(schema=True, is_db_reference=True) 7776 elif self._match_text_seq("DATABASE"): 7777 this = self._parse_table(schema=True, is_db_reference=True) 7778 elif self._match_text_seq("CLUSTER"): 7779 this = self._parse_table() 7780 # Try matching inner expr keywords before fallback to parse table. 7781 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7782 kind = None 7783 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7784 else: 7785 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7786 kind = None 7787 this = self._parse_table_parts() 7788 7789 partition = self._try_parse(self._parse_partition) 7790 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7791 return self._parse_as_command(start) 7792 7793 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7794 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7795 "WITH", "ASYNC", "MODE" 7796 ): 7797 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7798 else: 7799 mode = None 7800 7801 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7802 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7803 7804 properties = self._parse_properties() 7805 return self.expression( 7806 exp.Analyze, 7807 kind=kind, 7808 this=this, 7809 mode=mode, 7810 partition=partition, 7811 properties=properties, 7812 expression=inner_expression, 7813 options=options, 7814 ) 7815 7816 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7817 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7818 this = None 7819 kind = self._prev.text.upper() 7820 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7821 expressions = [] 7822 7823 if not self._match_text_seq("STATISTICS"): 7824 self.raise_error("Expecting token STATISTICS") 7825 7826 if self._match_text_seq("NOSCAN"): 7827 this = "NOSCAN" 7828 elif self._match(TokenType.FOR): 7829 if self._match_text_seq("ALL", "COLUMNS"): 7830 this = "FOR ALL COLUMNS" 7831 if self._match_texts("COLUMNS"): 7832 this = "FOR COLUMNS" 7833 expressions = self._parse_csv(self._parse_column_reference) 7834 elif self._match_text_seq("SAMPLE"): 7835 sample = self._parse_number() 7836 expressions = [ 7837 self.expression( 7838 exp.AnalyzeSample, 7839 sample=sample, 7840 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7841 ) 7842 ] 7843 7844 return self.expression( 7845 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7846 ) 7847 7848 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7849 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7850 kind = None 7851 this = None 7852 expression: t.Optional[exp.Expression] = None 7853 if self._match_text_seq("REF", "UPDATE"): 7854 kind = "REF" 7855 this = "UPDATE" 7856 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7857 this = "UPDATE SET DANGLING TO NULL" 7858 elif self._match_text_seq("STRUCTURE"): 7859 kind = "STRUCTURE" 7860 if self._match_text_seq("CASCADE", "FAST"): 7861 this = "CASCADE FAST" 7862 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7863 ("ONLINE", "OFFLINE") 7864 ): 7865 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7866 expression = self._parse_into() 7867 7868 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7869 7870 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7871 this = self._prev.text.upper() 7872 if self._match_text_seq("COLUMNS"): 7873 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7874 return None 7875 7876 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7877 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7878 if self._match_text_seq("STATISTICS"): 7879 return self.expression(exp.AnalyzeDelete, kind=kind) 7880 return None 7881 7882 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7883 if self._match_text_seq("CHAINED", "ROWS"): 7884 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7885 return None 7886 7887 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7888 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7889 this = self._prev.text.upper() 7890 expression: t.Optional[exp.Expression] = None 7891 expressions = [] 7892 update_options = None 7893 7894 if self._match_text_seq("HISTOGRAM", "ON"): 7895 expressions = self._parse_csv(self._parse_column_reference) 7896 with_expressions = [] 7897 while self._match(TokenType.WITH): 7898 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7899 if self._match_texts(("SYNC", "ASYNC")): 7900 if self._match_text_seq("MODE", advance=False): 7901 with_expressions.append(f"{self._prev.text.upper()} MODE") 7902 self._advance() 7903 else: 7904 buckets = self._parse_number() 7905 if self._match_text_seq("BUCKETS"): 7906 with_expressions.append(f"{buckets} BUCKETS") 7907 if with_expressions: 7908 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7909 7910 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7911 TokenType.UPDATE, advance=False 7912 ): 7913 update_options = self._prev.text.upper() 7914 self._advance() 7915 elif self._match_text_seq("USING", "DATA"): 7916 expression = self.expression(exp.UsingData, this=self._parse_string()) 7917 7918 return self.expression( 7919 exp.AnalyzeHistogram, 7920 this=this, 7921 expressions=expressions, 7922 expression=expression, 7923 update_options=update_options, 7924 ) 7925 7926 def _parse_merge(self) -> exp.Merge: 7927 self._match(TokenType.INTO) 7928 target = self._parse_table() 7929 7930 if target and self._match(TokenType.ALIAS, advance=False): 7931 target.set("alias", self._parse_table_alias()) 7932 7933 self._match(TokenType.USING) 7934 using = self._parse_table() 7935 7936 self._match(TokenType.ON) 7937 on = self._parse_assignment() 7938 7939 return self.expression( 7940 exp.Merge, 7941 this=target, 7942 using=using, 7943 on=on, 7944 whens=self._parse_when_matched(), 7945 returning=self._parse_returning(), 7946 ) 7947 7948 def _parse_when_matched(self) -> exp.Whens: 7949 whens = [] 7950 7951 while self._match(TokenType.WHEN): 7952 matched = not self._match(TokenType.NOT) 7953 self._match_text_seq("MATCHED") 7954 source = ( 7955 False 7956 if self._match_text_seq("BY", "TARGET") 7957 else self._match_text_seq("BY", "SOURCE") 7958 ) 7959 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7960 7961 self._match(TokenType.THEN) 7962 7963 if self._match(TokenType.INSERT): 7964 this = self._parse_star() 7965 if this: 7966 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7967 else: 7968 then = self.expression( 7969 exp.Insert, 7970 this=exp.var("ROW") 7971 if self._match_text_seq("ROW") 7972 else self._parse_value(values=False), 7973 expression=self._match_text_seq("VALUES") and self._parse_value(), 7974 ) 7975 elif self._match(TokenType.UPDATE): 7976 expressions = self._parse_star() 7977 if expressions: 7978 then = self.expression(exp.Update, expressions=expressions) 7979 else: 7980 then = self.expression( 7981 exp.Update, 7982 expressions=self._match(TokenType.SET) 7983 and self._parse_csv(self._parse_equality), 7984 ) 7985 elif self._match(TokenType.DELETE): 7986 then = self.expression(exp.Var, this=self._prev.text) 7987 else: 7988 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7989 7990 whens.append( 7991 self.expression( 7992 exp.When, 7993 matched=matched, 7994 source=source, 7995 condition=condition, 7996 then=then, 7997 ) 7998 ) 7999 return self.expression(exp.Whens, expressions=whens) 8000 8001 def _parse_show(self) -> t.Optional[exp.Expression]: 8002 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8003 if parser: 8004 return parser(self) 8005 return self._parse_as_command(self._prev) 8006 8007 def _parse_set_item_assignment( 8008 self, kind: t.Optional[str] = None 8009 ) -> t.Optional[exp.Expression]: 8010 index = self._index 8011 8012 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8013 return self._parse_set_transaction(global_=kind == "GLOBAL") 8014 8015 left = self._parse_primary() or self._parse_column() 8016 assignment_delimiter = self._match_texts(("=", "TO")) 8017 8018 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8019 self._retreat(index) 8020 return None 8021 8022 right = self._parse_statement() or self._parse_id_var() 8023 if isinstance(right, (exp.Column, exp.Identifier)): 8024 right = exp.var(right.name) 8025 8026 this = self.expression(exp.EQ, this=left, expression=right) 8027 return self.expression(exp.SetItem, this=this, kind=kind) 8028 8029 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8030 self._match_text_seq("TRANSACTION") 8031 characteristics = self._parse_csv( 8032 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8033 ) 8034 return self.expression( 8035 exp.SetItem, 8036 expressions=characteristics, 8037 kind="TRANSACTION", 8038 **{"global": global_}, # type: ignore 8039 ) 8040 8041 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8042 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8043 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8044 8045 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8046 index = self._index 8047 set_ = self.expression( 8048 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8049 ) 8050 8051 if self._curr: 8052 self._retreat(index) 8053 return self._parse_as_command(self._prev) 8054 8055 return set_ 8056 8057 def _parse_var_from_options( 8058 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8059 ) -> t.Optional[exp.Var]: 8060 start = self._curr 8061 if not start: 8062 return None 8063 8064 option = start.text.upper() 8065 continuations = options.get(option) 8066 8067 index = self._index 8068 self._advance() 8069 for keywords in continuations or []: 8070 if isinstance(keywords, str): 8071 keywords = (keywords,) 8072 8073 if self._match_text_seq(*keywords): 8074 option = f"{option} {' '.join(keywords)}" 8075 break 8076 else: 8077 if continuations or continuations is None: 8078 if raise_unmatched: 8079 self.raise_error(f"Unknown option {option}") 8080 8081 self._retreat(index) 8082 return None 8083 8084 return exp.var(option) 8085 8086 def _parse_as_command(self, start: Token) -> exp.Command: 8087 while self._curr: 8088 self._advance() 8089 text = self._find_sql(start, self._prev) 8090 size = len(start.text) 8091 self._warn_unsupported() 8092 return exp.Command(this=text[:size], expression=text[size:]) 8093 8094 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8095 settings = [] 8096 8097 self._match_l_paren() 8098 kind = self._parse_id_var() 8099 8100 if self._match(TokenType.L_PAREN): 8101 while True: 8102 key = self._parse_id_var() 8103 value = self._parse_primary() 8104 if not key and value is None: 8105 break 8106 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8107 self._match(TokenType.R_PAREN) 8108 8109 self._match_r_paren() 8110 8111 return self.expression( 8112 exp.DictProperty, 8113 this=this, 8114 kind=kind.this if kind else None, 8115 settings=settings, 8116 ) 8117 8118 def _parse_dict_range(self, this: str) -> exp.DictRange: 8119 self._match_l_paren() 8120 has_min = self._match_text_seq("MIN") 8121 if has_min: 8122 min = self._parse_var() or self._parse_primary() 8123 self._match_text_seq("MAX") 8124 max = self._parse_var() or self._parse_primary() 8125 else: 8126 max = self._parse_var() or self._parse_primary() 8127 min = exp.Literal.number(0) 8128 self._match_r_paren() 8129 return self.expression(exp.DictRange, this=this, min=min, max=max) 8130 8131 def _parse_comprehension( 8132 self, this: t.Optional[exp.Expression] 8133 ) -> t.Optional[exp.Comprehension]: 8134 index = self._index 8135 expression = self._parse_column() 8136 if not self._match(TokenType.IN): 8137 self._retreat(index - 1) 8138 return None 8139 iterator = self._parse_column() 8140 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8141 return self.expression( 8142 exp.Comprehension, 8143 this=this, 8144 expression=expression, 8145 iterator=iterator, 8146 condition=condition, 8147 ) 8148 8149 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8150 if self._match(TokenType.HEREDOC_STRING): 8151 return self.expression(exp.Heredoc, this=self._prev.text) 8152 8153 if not self._match_text_seq("$"): 8154 return None 8155 8156 tags = ["$"] 8157 tag_text = None 8158 8159 if self._is_connected(): 8160 self._advance() 8161 tags.append(self._prev.text.upper()) 8162 else: 8163 self.raise_error("No closing $ found") 8164 8165 if tags[-1] != "$": 8166 if self._is_connected() and self._match_text_seq("$"): 8167 tag_text = tags[-1] 8168 tags.append("$") 8169 else: 8170 self.raise_error("No closing $ found") 8171 8172 heredoc_start = self._curr 8173 8174 while self._curr: 8175 if self._match_text_seq(*tags, advance=False): 8176 this = self._find_sql(heredoc_start, self._prev) 8177 self._advance(len(tags)) 8178 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8179 8180 self._advance() 8181 8182 self.raise_error(f"No closing {''.join(tags)} found") 8183 return None 8184 8185 def _find_parser( 8186 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8187 ) -> t.Optional[t.Callable]: 8188 if not self._curr: 8189 return None 8190 8191 index = self._index 8192 this = [] 8193 while True: 8194 # The current token might be multiple words 8195 curr = self._curr.text.upper() 8196 key = curr.split(" ") 8197 this.append(curr) 8198 8199 self._advance() 8200 result, trie = in_trie(trie, key) 8201 if result == TrieResult.FAILED: 8202 break 8203 8204 if result == TrieResult.EXISTS: 8205 subparser = parsers[" ".join(this)] 8206 return subparser 8207 8208 self._retreat(index) 8209 return None 8210 8211 def _match(self, token_type, advance=True, expression=None): 8212 if not self._curr: 8213 return None 8214 8215 if self._curr.token_type == token_type: 8216 if advance: 8217 self._advance() 8218 self._add_comments(expression) 8219 return True 8220 8221 return None 8222 8223 def _match_set(self, types, advance=True): 8224 if not self._curr: 8225 return None 8226 8227 if self._curr.token_type in types: 8228 if advance: 8229 self._advance() 8230 return True 8231 8232 return None 8233 8234 def _match_pair(self, token_type_a, token_type_b, advance=True): 8235 if not self._curr or not self._next: 8236 return None 8237 8238 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8239 if advance: 8240 self._advance(2) 8241 return True 8242 8243 return None 8244 8245 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8246 if not self._match(TokenType.L_PAREN, expression=expression): 8247 self.raise_error("Expecting (") 8248 8249 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8250 if not self._match(TokenType.R_PAREN, expression=expression): 8251 self.raise_error("Expecting )") 8252 8253 def _match_texts(self, texts, advance=True): 8254 if ( 8255 self._curr 8256 and self._curr.token_type != TokenType.STRING 8257 and self._curr.text.upper() in texts 8258 ): 8259 if advance: 8260 self._advance() 8261 return True 8262 return None 8263 8264 def _match_text_seq(self, *texts, advance=True): 8265 index = self._index 8266 for text in texts: 8267 if ( 8268 self._curr 8269 and self._curr.token_type != TokenType.STRING 8270 and self._curr.text.upper() == text 8271 ): 8272 self._advance() 8273 else: 8274 self._retreat(index) 8275 return None 8276 8277 if not advance: 8278 self._retreat(index) 8279 8280 return True 8281 8282 def _replace_lambda( 8283 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8284 ) -> t.Optional[exp.Expression]: 8285 if not node: 8286 return node 8287 8288 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8289 8290 for column in node.find_all(exp.Column): 8291 typ = lambda_types.get(column.parts[0].name) 8292 if typ is not None: 8293 dot_or_id = column.to_dot() if column.table else column.this 8294 8295 if typ: 8296 dot_or_id = self.expression( 8297 exp.Cast, 8298 this=dot_or_id, 8299 to=typ, 8300 ) 8301 8302 parent = column.parent 8303 8304 while isinstance(parent, exp.Dot): 8305 if not isinstance(parent.parent, exp.Dot): 8306 parent.replace(dot_or_id) 8307 break 8308 parent = parent.parent 8309 else: 8310 if column is node: 8311 node = dot_or_id 8312 else: 8313 column.replace(dot_or_id) 8314 return node 8315 8316 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8317 start = self._prev 8318 8319 # Not to be confused with TRUNCATE(number, decimals) function call 8320 if self._match(TokenType.L_PAREN): 8321 self._retreat(self._index - 2) 8322 return self._parse_function() 8323 8324 # Clickhouse supports TRUNCATE DATABASE as well 8325 is_database = self._match(TokenType.DATABASE) 8326 8327 self._match(TokenType.TABLE) 8328 8329 exists = self._parse_exists(not_=False) 8330 8331 expressions = self._parse_csv( 8332 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8333 ) 8334 8335 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8336 8337 if self._match_text_seq("RESTART", "IDENTITY"): 8338 identity = "RESTART" 8339 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8340 identity = "CONTINUE" 8341 else: 8342 identity = None 8343 8344 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8345 option = self._prev.text 8346 else: 8347 option = None 8348 8349 partition = self._parse_partition() 8350 8351 # Fallback case 8352 if self._curr: 8353 return self._parse_as_command(start) 8354 8355 return self.expression( 8356 exp.TruncateTable, 8357 expressions=expressions, 8358 is_database=is_database, 8359 exists=exists, 8360 cluster=cluster, 8361 identity=identity, 8362 option=option, 8363 partition=partition, 8364 ) 8365 8366 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8367 this = self._parse_ordered(self._parse_opclass) 8368 8369 if not self._match(TokenType.WITH): 8370 return this 8371 8372 op = self._parse_var(any_token=True) 8373 8374 return self.expression(exp.WithOperator, this=this, op=op) 8375 8376 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8377 self._match(TokenType.EQ) 8378 self._match(TokenType.L_PAREN) 8379 8380 opts: t.List[t.Optional[exp.Expression]] = [] 8381 option: exp.Expression | None 8382 while self._curr and not self._match(TokenType.R_PAREN): 8383 if self._match_text_seq("FORMAT_NAME", "="): 8384 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8385 option = self._parse_format_name() 8386 else: 8387 option = self._parse_property() 8388 8389 if option is None: 8390 self.raise_error("Unable to parse option") 8391 break 8392 8393 opts.append(option) 8394 8395 return opts 8396 8397 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8398 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8399 8400 options = [] 8401 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8402 option = self._parse_var(any_token=True) 8403 prev = self._prev.text.upper() 8404 8405 # Different dialects might separate options and values by white space, "=" and "AS" 8406 self._match(TokenType.EQ) 8407 self._match(TokenType.ALIAS) 8408 8409 param = self.expression(exp.CopyParameter, this=option) 8410 8411 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8412 TokenType.L_PAREN, advance=False 8413 ): 8414 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8415 param.set("expressions", self._parse_wrapped_options()) 8416 elif prev == "FILE_FORMAT": 8417 # T-SQL's external file format case 8418 param.set("expression", self._parse_field()) 8419 else: 8420 param.set("expression", self._parse_unquoted_field()) 8421 8422 options.append(param) 8423 self._match(sep) 8424 8425 return options 8426 8427 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8428 expr = self.expression(exp.Credentials) 8429 8430 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8431 expr.set("storage", self._parse_field()) 8432 if self._match_text_seq("CREDENTIALS"): 8433 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8434 creds = ( 8435 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8436 ) 8437 expr.set("credentials", creds) 8438 if self._match_text_seq("ENCRYPTION"): 8439 expr.set("encryption", self._parse_wrapped_options()) 8440 if self._match_text_seq("IAM_ROLE"): 8441 expr.set("iam_role", self._parse_field()) 8442 if self._match_text_seq("REGION"): 8443 expr.set("region", self._parse_field()) 8444 8445 return expr 8446 8447 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8448 return self._parse_field() 8449 8450 def _parse_copy(self) -> exp.Copy | exp.Command: 8451 start = self._prev 8452 8453 self._match(TokenType.INTO) 8454 8455 this = ( 8456 self._parse_select(nested=True, parse_subquery_alias=False) 8457 if self._match(TokenType.L_PAREN, advance=False) 8458 else self._parse_table(schema=True) 8459 ) 8460 8461 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8462 8463 files = self._parse_csv(self._parse_file_location) 8464 if self._match(TokenType.EQ, advance=False): 8465 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8466 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8467 # list via `_parse_wrapped(..)` below. 8468 self._advance(-1) 8469 files = [] 8470 8471 credentials = self._parse_credentials() 8472 8473 self._match_text_seq("WITH") 8474 8475 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8476 8477 # Fallback case 8478 if self._curr: 8479 return self._parse_as_command(start) 8480 8481 return self.expression( 8482 exp.Copy, 8483 this=this, 8484 kind=kind, 8485 credentials=credentials, 8486 files=files, 8487 params=params, 8488 ) 8489 8490 def _parse_normalize(self) -> exp.Normalize: 8491 return self.expression( 8492 exp.Normalize, 8493 this=self._parse_bitwise(), 8494 form=self._match(TokenType.COMMA) and self._parse_var(), 8495 ) 8496 8497 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8498 args = self._parse_csv(lambda: self._parse_lambda()) 8499 8500 this = seq_get(args, 0) 8501 decimals = seq_get(args, 1) 8502 8503 return expr_type( 8504 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8505 ) 8506 8507 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8508 star_token = self._prev 8509 8510 if self._match_text_seq("COLUMNS", "(", advance=False): 8511 this = self._parse_function() 8512 if isinstance(this, exp.Columns): 8513 this.set("unpack", True) 8514 return this 8515 8516 return self.expression( 8517 exp.Star, 8518 **{ # type: ignore 8519 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8520 "replace": self._parse_star_op("REPLACE"), 8521 "rename": self._parse_star_op("RENAME"), 8522 }, 8523 ).update_positions(star_token) 8524 8525 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8526 privilege_parts = [] 8527 8528 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8529 # (end of privilege list) or L_PAREN (start of column list) are met 8530 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8531 privilege_parts.append(self._curr.text.upper()) 8532 self._advance() 8533 8534 this = exp.var(" ".join(privilege_parts)) 8535 expressions = ( 8536 self._parse_wrapped_csv(self._parse_column) 8537 if self._match(TokenType.L_PAREN, advance=False) 8538 else None 8539 ) 8540 8541 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8542 8543 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8544 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8545 principal = self._parse_id_var() 8546 8547 if not principal: 8548 return None 8549 8550 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8551 8552 def _parse_grant_revoke_common( 8553 self, 8554 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8555 privileges = self._parse_csv(self._parse_grant_privilege) 8556 8557 self._match(TokenType.ON) 8558 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8559 8560 # Attempt to parse the securable e.g. MySQL allows names 8561 # such as "foo.*", "*.*" which are not easily parseable yet 8562 securable = self._try_parse(self._parse_table_parts) 8563 8564 return privileges, kind, securable 8565 8566 def _parse_grant(self) -> exp.Grant | exp.Command: 8567 start = self._prev 8568 8569 privileges, kind, securable = self._parse_grant_revoke_common() 8570 8571 if not securable or not self._match_text_seq("TO"): 8572 return self._parse_as_command(start) 8573 8574 principals = self._parse_csv(self._parse_grant_principal) 8575 8576 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8577 8578 if self._curr: 8579 return self._parse_as_command(start) 8580 8581 return self.expression( 8582 exp.Grant, 8583 privileges=privileges, 8584 kind=kind, 8585 securable=securable, 8586 principals=principals, 8587 grant_option=grant_option, 8588 ) 8589 8590 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8591 start = self._prev 8592 8593 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8594 8595 privileges, kind, securable = self._parse_grant_revoke_common() 8596 8597 if not securable or not self._match_text_seq("FROM"): 8598 return self._parse_as_command(start) 8599 8600 principals = self._parse_csv(self._parse_grant_principal) 8601 8602 cascade = None 8603 if self._match_texts(("CASCADE", "RESTRICT")): 8604 cascade = self._prev.text.upper() 8605 8606 if self._curr: 8607 return self._parse_as_command(start) 8608 8609 return self.expression( 8610 exp.Revoke, 8611 privileges=privileges, 8612 kind=kind, 8613 securable=securable, 8614 principals=principals, 8615 grant_option=grant_option, 8616 cascade=cascade, 8617 ) 8618 8619 def _parse_overlay(self) -> exp.Overlay: 8620 return self.expression( 8621 exp.Overlay, 8622 **{ # type: ignore 8623 "this": self._parse_bitwise(), 8624 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8625 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8626 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8627 }, 8628 ) 8629 8630 def _parse_format_name(self) -> exp.Property: 8631 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8632 # for FILE_FORMAT = <format_name> 8633 return self.expression( 8634 exp.Property, 8635 this=exp.var("FORMAT_NAME"), 8636 value=self._parse_string() or self._parse_table_parts(), 8637 ) 8638 8639 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8640 args: t.List[exp.Expression] = [] 8641 8642 if self._match(TokenType.DISTINCT): 8643 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8644 self._match(TokenType.COMMA) 8645 8646 args.extend(self._parse_csv(self._parse_assignment)) 8647 8648 return self.expression( 8649 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8650 ) 8651 8652 def _identifier_expression( 8653 self, token: t.Optional[Token] = None, **kwargs: t.Any 8654 ) -> exp.Identifier: 8655 token = token or self._prev 8656 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8657 expression.update_positions(token) 8658 return expression 8659 8660 def _build_pipe_cte( 8661 self, 8662 query: exp.Query, 8663 expressions: t.List[exp.Expression], 8664 alias_cte: t.Optional[exp.TableAlias] = None, 8665 ) -> exp.Select: 8666 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8667 if alias_cte: 8668 new_cte = alias_cte 8669 else: 8670 self._pipe_cte_counter += 1 8671 new_cte = f"__tmp{self._pipe_cte_counter}" 8672 8673 with_ = query.args.get("with") 8674 ctes = with_.pop() if with_ else None 8675 8676 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8677 if ctes: 8678 new_select.set("with", ctes) 8679 8680 return new_select.with_(new_cte, as_=query, copy=False) 8681 8682 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8683 select = self._parse_select(consume_pipe=False) 8684 if not select: 8685 return query 8686 8687 return self._build_pipe_cte( 8688 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8689 ) 8690 8691 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8692 limit = self._parse_limit() 8693 offset = self._parse_offset() 8694 if limit: 8695 curr_limit = query.args.get("limit", limit) 8696 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8697 query.limit(limit, copy=False) 8698 if offset: 8699 curr_offset = query.args.get("offset") 8700 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8701 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8702 8703 return query 8704 8705 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8706 this = self._parse_assignment() 8707 if self._match_text_seq("GROUP", "AND", advance=False): 8708 return this 8709 8710 this = self._parse_alias(this) 8711 8712 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8713 return self._parse_ordered(lambda: this) 8714 8715 return this 8716 8717 def _parse_pipe_syntax_aggregate_group_order_by( 8718 self, query: exp.Select, group_by_exists: bool = True 8719 ) -> exp.Select: 8720 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8721 aggregates_or_groups, orders = [], [] 8722 for element in expr: 8723 if isinstance(element, exp.Ordered): 8724 this = element.this 8725 if isinstance(this, exp.Alias): 8726 element.set("this", this.args["alias"]) 8727 orders.append(element) 8728 else: 8729 this = element 8730 aggregates_or_groups.append(this) 8731 8732 if group_by_exists: 8733 query.select(*aggregates_or_groups, copy=False).group_by( 8734 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8735 copy=False, 8736 ) 8737 else: 8738 query.select(*aggregates_or_groups, append=False, copy=False) 8739 8740 if orders: 8741 return query.order_by(*orders, append=False, copy=False) 8742 8743 return query 8744 8745 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8746 self._match_text_seq("AGGREGATE") 8747 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8748 8749 if self._match(TokenType.GROUP_BY) or ( 8750 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8751 ): 8752 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8753 8754 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8755 8756 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8757 first_setop = self.parse_set_operation(this=query) 8758 if not first_setop: 8759 return None 8760 8761 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8762 expr = self._parse_paren() 8763 return expr.assert_is(exp.Subquery).unnest() if expr else None 8764 8765 first_setop.this.pop() 8766 8767 setops = [ 8768 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8769 *self._parse_csv(_parse_and_unwrap_query), 8770 ] 8771 8772 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8773 with_ = query.args.get("with") 8774 ctes = with_.pop() if with_ else None 8775 8776 if isinstance(first_setop, exp.Union): 8777 query = query.union(*setops, copy=False, **first_setop.args) 8778 elif isinstance(first_setop, exp.Except): 8779 query = query.except_(*setops, copy=False, **first_setop.args) 8780 else: 8781 query = query.intersect(*setops, copy=False, **first_setop.args) 8782 8783 query.set("with", ctes) 8784 8785 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8786 8787 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8788 join = self._parse_join() 8789 if not join: 8790 return None 8791 8792 if isinstance(query, exp.Select): 8793 return query.join(join, copy=False) 8794 8795 return query 8796 8797 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8798 pivots = self._parse_pivots() 8799 if not pivots: 8800 return query 8801 8802 from_ = query.args.get("from") 8803 if from_: 8804 from_.this.set("pivots", pivots) 8805 8806 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8807 8808 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8809 self._match_text_seq("EXTEND") 8810 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8811 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8812 8813 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8814 sample = self._parse_table_sample() 8815 8816 with_ = query.args.get("with") 8817 if with_: 8818 with_.expressions[-1].this.set("sample", sample) 8819 else: 8820 query.set("sample", sample) 8821 8822 return query 8823 8824 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8825 if isinstance(query, exp.Subquery): 8826 query = exp.select("*").from_(query, copy=False) 8827 8828 if not query.args.get("from"): 8829 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8830 8831 while self._match(TokenType.PIPE_GT): 8832 start = self._curr 8833 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8834 if not parser: 8835 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8836 # keywords, making it tricky to disambiguate them without lookahead. The approach 8837 # here is to try and parse a set operation and if that fails, then try to parse a 8838 # join operator. If that fails as well, then the operator is not supported. 8839 parsed_query = self._parse_pipe_syntax_set_operator(query) 8840 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8841 if not parsed_query: 8842 self._retreat(start) 8843 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8844 break 8845 query = parsed_query 8846 else: 8847 query = parser(self, query) 8848 8849 return query 8850 8851 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8852 vars = self._parse_csv(self._parse_id_var) 8853 if not vars: 8854 return None 8855 8856 return self.expression( 8857 exp.DeclareItem, 8858 this=vars, 8859 kind=self._parse_types(), 8860 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8861 ) 8862 8863 def _parse_declare(self) -> exp.Declare | exp.Command: 8864 start = self._prev 8865 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8866 8867 if not expressions or self._curr: 8868 return self._parse_as_command(start) 8869 8870 return self.expression(exp.Declare, expressions=expressions) 8871 8872 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8873 exp_class = exp.Cast if strict else exp.TryCast 8874 8875 if exp_class == exp.TryCast: 8876 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8877 8878 return self.expression(exp_class, **kwargs) 8879 8880 def _parse_json_value(self) -> exp.JSONValue: 8881 this = self._parse_bitwise() 8882 self._match(TokenType.COMMA) 8883 path = self._parse_bitwise() 8884 8885 returning = self._match(TokenType.RETURNING) and self._parse_type() 8886 8887 return self.expression( 8888 exp.JSONValue, 8889 this=this, 8890 path=self.dialect.to_json_path(path), 8891 returning=returning, 8892 on_condition=self._parse_on_condition(), 8893 ) 8894 8895 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8896 def concat_exprs( 8897 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8898 ) -> exp.Expression: 8899 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8900 concat_exprs = [ 8901 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8902 ] 8903 node.set("expressions", concat_exprs) 8904 return node 8905 if len(exprs) == 1: 8906 return exprs[0] 8907 return self.expression(exp.Concat, expressions=args, safe=True) 8908 8909 args = self._parse_csv(self._parse_lambda) 8910 8911 if args: 8912 order = args[-1] if isinstance(args[-1], exp.Order) else None 8913 8914 if order: 8915 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8916 # remove 'expr' from exp.Order and add it back to args 8917 args[-1] = order.this 8918 order.set("this", concat_exprs(order.this, args)) 8919 8920 this = order or concat_exprs(args[0], args) 8921 else: 8922 this = None 8923 8924 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8925 8926 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.LOCK, 536 TokenType.MERGE, 537 TokenType.NATURAL, 538 TokenType.NEXT, 539 TokenType.OFFSET, 540 TokenType.OPERATOR, 541 TokenType.ORDINALITY, 542 TokenType.OVERLAPS, 543 TokenType.OVERWRITE, 544 TokenType.PARTITION, 545 TokenType.PERCENT, 546 TokenType.PIVOT, 547 TokenType.PRAGMA, 548 TokenType.PUT, 549 TokenType.RANGE, 550 TokenType.RECURSIVE, 551 TokenType.REFERENCES, 552 TokenType.REFRESH, 553 TokenType.RENAME, 554 TokenType.REPLACE, 555 TokenType.RIGHT, 556 TokenType.ROLLUP, 557 TokenType.ROW, 558 TokenType.ROWS, 559 TokenType.SEMI, 560 TokenType.SET, 561 TokenType.SETTINGS, 562 TokenType.SHOW, 563 TokenType.TEMPORARY, 564 TokenType.TOP, 565 TokenType.TRUE, 566 TokenType.TRUNCATE, 567 TokenType.UNIQUE, 568 TokenType.UNNEST, 569 TokenType.UNPIVOT, 570 TokenType.UPDATE, 571 TokenType.USE, 572 TokenType.VOLATILE, 573 TokenType.WINDOW, 574 *ALTERABLES, 575 *CREATABLES, 576 *SUBQUERY_PREDICATES, 577 *TYPE_TOKENS, 578 *NO_PAREN_FUNCTIONS, 579 } 580 ID_VAR_TOKENS.remove(TokenType.UNION) 581 582 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 583 TokenType.ANTI, 584 TokenType.ASOF, 585 TokenType.FULL, 586 TokenType.LEFT, 587 TokenType.LOCK, 588 TokenType.NATURAL, 589 TokenType.RIGHT, 590 TokenType.SEMI, 591 TokenType.WINDOW, 592 } 593 594 ALIAS_TOKENS = ID_VAR_TOKENS 595 596 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 597 598 ARRAY_CONSTRUCTORS = { 599 "ARRAY": exp.Array, 600 "LIST": exp.List, 601 } 602 603 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 604 605 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 606 607 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 608 609 FUNC_TOKENS = { 610 TokenType.COLLATE, 611 TokenType.COMMAND, 612 TokenType.CURRENT_DATE, 613 TokenType.CURRENT_DATETIME, 614 TokenType.CURRENT_SCHEMA, 615 TokenType.CURRENT_TIMESTAMP, 616 TokenType.CURRENT_TIME, 617 TokenType.CURRENT_USER, 618 TokenType.FILTER, 619 TokenType.FIRST, 620 TokenType.FORMAT, 621 TokenType.GET, 622 TokenType.GLOB, 623 TokenType.IDENTIFIER, 624 TokenType.INDEX, 625 TokenType.ISNULL, 626 TokenType.ILIKE, 627 TokenType.INSERT, 628 TokenType.LIKE, 629 TokenType.MERGE, 630 TokenType.NEXT, 631 TokenType.OFFSET, 632 TokenType.PRIMARY_KEY, 633 TokenType.RANGE, 634 TokenType.REPLACE, 635 TokenType.RLIKE, 636 TokenType.ROW, 637 TokenType.UNNEST, 638 TokenType.VAR, 639 TokenType.LEFT, 640 TokenType.RIGHT, 641 TokenType.SEQUENCE, 642 TokenType.DATE, 643 TokenType.DATETIME, 644 TokenType.TABLE, 645 TokenType.TIMESTAMP, 646 TokenType.TIMESTAMPTZ, 647 TokenType.TRUNCATE, 648 TokenType.UTC_DATE, 649 TokenType.UTC_TIME, 650 TokenType.UTC_TIMESTAMP, 651 TokenType.WINDOW, 652 TokenType.XOR, 653 *TYPE_TOKENS, 654 *SUBQUERY_PREDICATES, 655 } 656 657 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 658 TokenType.AND: exp.And, 659 } 660 661 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 662 TokenType.COLON_EQ: exp.PropertyEQ, 663 } 664 665 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 666 TokenType.OR: exp.Or, 667 } 668 669 EQUALITY = { 670 TokenType.EQ: exp.EQ, 671 TokenType.NEQ: exp.NEQ, 672 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 673 } 674 675 COMPARISON = { 676 TokenType.GT: exp.GT, 677 TokenType.GTE: exp.GTE, 678 TokenType.LT: exp.LT, 679 TokenType.LTE: exp.LTE, 680 } 681 682 BITWISE = { 683 TokenType.AMP: exp.BitwiseAnd, 684 TokenType.CARET: exp.BitwiseXor, 685 TokenType.PIPE: exp.BitwiseOr, 686 } 687 688 TERM = { 689 TokenType.DASH: exp.Sub, 690 TokenType.PLUS: exp.Add, 691 TokenType.MOD: exp.Mod, 692 TokenType.COLLATE: exp.Collate, 693 } 694 695 FACTOR = { 696 TokenType.DIV: exp.IntDiv, 697 TokenType.LR_ARROW: exp.Distance, 698 TokenType.SLASH: exp.Div, 699 TokenType.STAR: exp.Mul, 700 } 701 702 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 703 704 TIMES = { 705 TokenType.TIME, 706 TokenType.TIMETZ, 707 } 708 709 TIMESTAMPS = { 710 TokenType.TIMESTAMP, 711 TokenType.TIMESTAMPNTZ, 712 TokenType.TIMESTAMPTZ, 713 TokenType.TIMESTAMPLTZ, 714 *TIMES, 715 } 716 717 SET_OPERATIONS = { 718 TokenType.UNION, 719 TokenType.INTERSECT, 720 TokenType.EXCEPT, 721 } 722 723 JOIN_METHODS = { 724 TokenType.ASOF, 725 TokenType.NATURAL, 726 TokenType.POSITIONAL, 727 } 728 729 JOIN_SIDES = { 730 TokenType.LEFT, 731 TokenType.RIGHT, 732 TokenType.FULL, 733 } 734 735 JOIN_KINDS = { 736 TokenType.ANTI, 737 TokenType.CROSS, 738 TokenType.INNER, 739 TokenType.OUTER, 740 TokenType.SEMI, 741 TokenType.STRAIGHT_JOIN, 742 } 743 744 JOIN_HINTS: t.Set[str] = set() 745 746 LAMBDAS = { 747 TokenType.ARROW: lambda self, expressions: self.expression( 748 exp.Lambda, 749 this=self._replace_lambda( 750 self._parse_assignment(), 751 expressions, 752 ), 753 expressions=expressions, 754 ), 755 TokenType.FARROW: lambda self, expressions: self.expression( 756 exp.Kwarg, 757 this=exp.var(expressions[0].name), 758 expression=self._parse_assignment(), 759 ), 760 } 761 762 COLUMN_OPERATORS = { 763 TokenType.DOT: None, 764 TokenType.DOTCOLON: lambda self, this, to: self.expression( 765 exp.JSONCast, 766 this=this, 767 to=to, 768 ), 769 TokenType.DCOLON: lambda self, this, to: self.build_cast( 770 strict=self.STRICT_CAST, this=this, to=to 771 ), 772 TokenType.ARROW: lambda self, this, path: self.expression( 773 exp.JSONExtract, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.DARROW: lambda self, this, path: self.expression( 779 exp.JSONExtractScalar, 780 this=this, 781 expression=self.dialect.to_json_path(path), 782 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 783 ), 784 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtract, 786 this=this, 787 expression=path, 788 ), 789 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 790 exp.JSONBExtractScalar, 791 this=this, 792 expression=path, 793 ), 794 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 795 exp.JSONBContains, 796 this=this, 797 expression=key, 798 ), 799 } 800 801 CAST_COLUMN_OPERATORS = { 802 TokenType.DOTCOLON, 803 TokenType.DCOLON, 804 } 805 806 EXPRESSION_PARSERS = { 807 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 808 exp.Column: lambda self: self._parse_column(), 809 exp.Condition: lambda self: self._parse_assignment(), 810 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 811 exp.Expression: lambda self: self._parse_expression(), 812 exp.From: lambda self: self._parse_from(joins=True), 813 exp.Group: lambda self: self._parse_group(), 814 exp.Having: lambda self: self._parse_having(), 815 exp.Hint: lambda self: self._parse_hint_body(), 816 exp.Identifier: lambda self: self._parse_id_var(), 817 exp.Join: lambda self: self._parse_join(), 818 exp.Lambda: lambda self: self._parse_lambda(), 819 exp.Lateral: lambda self: self._parse_lateral(), 820 exp.Limit: lambda self: self._parse_limit(), 821 exp.Offset: lambda self: self._parse_offset(), 822 exp.Order: lambda self: self._parse_order(), 823 exp.Ordered: lambda self: self._parse_ordered(), 824 exp.Properties: lambda self: self._parse_properties(), 825 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 826 exp.Qualify: lambda self: self._parse_qualify(), 827 exp.Returning: lambda self: self._parse_returning(), 828 exp.Select: lambda self: self._parse_select(), 829 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 830 exp.Table: lambda self: self._parse_table_parts(), 831 exp.TableAlias: lambda self: self._parse_table_alias(), 832 exp.Tuple: lambda self: self._parse_value(values=False), 833 exp.Whens: lambda self: self._parse_when_matched(), 834 exp.Where: lambda self: self._parse_where(), 835 exp.Window: lambda self: self._parse_named_window(), 836 exp.With: lambda self: self._parse_with(), 837 "JOIN_TYPE": lambda self: self._parse_join_parts(), 838 } 839 840 STATEMENT_PARSERS = { 841 TokenType.ALTER: lambda self: self._parse_alter(), 842 TokenType.ANALYZE: lambda self: self._parse_analyze(), 843 TokenType.BEGIN: lambda self: self._parse_transaction(), 844 TokenType.CACHE: lambda self: self._parse_cache(), 845 TokenType.COMMENT: lambda self: self._parse_comment(), 846 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 847 TokenType.COPY: lambda self: self._parse_copy(), 848 TokenType.CREATE: lambda self: self._parse_create(), 849 TokenType.DELETE: lambda self: self._parse_delete(), 850 TokenType.DESC: lambda self: self._parse_describe(), 851 TokenType.DESCRIBE: lambda self: self._parse_describe(), 852 TokenType.DROP: lambda self: self._parse_drop(), 853 TokenType.GRANT: lambda self: self._parse_grant(), 854 TokenType.REVOKE: lambda self: self._parse_revoke(), 855 TokenType.INSERT: lambda self: self._parse_insert(), 856 TokenType.KILL: lambda self: self._parse_kill(), 857 TokenType.LOAD: lambda self: self._parse_load(), 858 TokenType.MERGE: lambda self: self._parse_merge(), 859 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 860 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 861 TokenType.REFRESH: lambda self: self._parse_refresh(), 862 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 863 TokenType.SET: lambda self: self._parse_set(), 864 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 865 TokenType.UNCACHE: lambda self: self._parse_uncache(), 866 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 867 TokenType.UPDATE: lambda self: self._parse_update(), 868 TokenType.USE: lambda self: self._parse_use(), 869 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 870 } 871 872 UNARY_PARSERS = { 873 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 874 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 875 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 876 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 877 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 878 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 879 } 880 881 STRING_PARSERS = { 882 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 883 exp.RawString, this=token.text 884 ), 885 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 886 exp.National, this=token.text 887 ), 888 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 889 TokenType.STRING: lambda self, token: self.expression( 890 exp.Literal, this=token.text, is_string=True 891 ), 892 TokenType.UNICODE_STRING: lambda self, token: self.expression( 893 exp.UnicodeString, 894 this=token.text, 895 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 896 ), 897 } 898 899 NUMERIC_PARSERS = { 900 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 901 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 902 TokenType.HEX_STRING: lambda self, token: self.expression( 903 exp.HexString, 904 this=token.text, 905 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 906 ), 907 TokenType.NUMBER: lambda self, token: self.expression( 908 exp.Literal, this=token.text, is_string=False 909 ), 910 } 911 912 PRIMARY_PARSERS = { 913 **STRING_PARSERS, 914 **NUMERIC_PARSERS, 915 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 916 TokenType.NULL: lambda self, _: self.expression(exp.Null), 917 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 918 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 919 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 920 TokenType.STAR: lambda self, _: self._parse_star_ops(), 921 } 922 923 PLACEHOLDER_PARSERS = { 924 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 925 TokenType.PARAMETER: lambda self: self._parse_parameter(), 926 TokenType.COLON: lambda self: ( 927 self.expression(exp.Placeholder, this=self._prev.text) 928 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 929 else None 930 ), 931 } 932 933 RANGE_PARSERS = { 934 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 935 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 936 TokenType.GLOB: binary_range_parser(exp.Glob), 937 TokenType.ILIKE: binary_range_parser(exp.ILike), 938 TokenType.IN: lambda self, this: self._parse_in(this), 939 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 940 TokenType.IS: lambda self, this: self._parse_is(this), 941 TokenType.LIKE: binary_range_parser(exp.Like), 942 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 943 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 944 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 945 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 946 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 947 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 948 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 949 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 950 } 951 952 PIPE_SYNTAX_TRANSFORM_PARSERS = { 953 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 954 "AS": lambda self, query: self._build_pipe_cte( 955 query, [exp.Star()], self._parse_table_alias() 956 ), 957 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 958 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 959 "ORDER BY": lambda self, query: query.order_by( 960 self._parse_order(), append=False, copy=False 961 ), 962 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 963 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 964 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 965 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 966 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 967 } 968 969 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 970 "ALLOWED_VALUES": lambda self: self.expression( 971 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 972 ), 973 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 974 "AUTO": lambda self: self._parse_auto_property(), 975 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 976 "BACKUP": lambda self: self.expression( 977 exp.BackupProperty, this=self._parse_var(any_token=True) 978 ), 979 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 980 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 982 "CHECKSUM": lambda self: self._parse_checksum(), 983 "CLUSTER BY": lambda self: self._parse_cluster(), 984 "CLUSTERED": lambda self: self._parse_clustered_by(), 985 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 986 exp.CollateProperty, **kwargs 987 ), 988 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 989 "CONTAINS": lambda self: self._parse_contains_property(), 990 "COPY": lambda self: self._parse_copy_property(), 991 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 992 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 993 "DEFINER": lambda self: self._parse_definer(), 994 "DETERMINISTIC": lambda self: self.expression( 995 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 996 ), 997 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 998 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 999 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1000 "DISTKEY": lambda self: self._parse_distkey(), 1001 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1002 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1003 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1004 "ENVIRONMENT": lambda self: self.expression( 1005 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1006 ), 1007 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1008 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1009 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1010 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1011 "FREESPACE": lambda self: self._parse_freespace(), 1012 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1013 "HEAP": lambda self: self.expression(exp.HeapProperty), 1014 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1015 "IMMUTABLE": lambda self: self.expression( 1016 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1017 ), 1018 "INHERITS": lambda self: self.expression( 1019 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1020 ), 1021 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1022 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1023 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1024 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1025 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1026 "LIKE": lambda self: self._parse_create_like(), 1027 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1028 "LOCK": lambda self: self._parse_locking(), 1029 "LOCKING": lambda self: self._parse_locking(), 1030 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1031 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1032 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1033 "MODIFIES": lambda self: self._parse_modifies_property(), 1034 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1035 "NO": lambda self: self._parse_no_property(), 1036 "ON": lambda self: self._parse_on_property(), 1037 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1038 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1039 "PARTITION": lambda self: self._parse_partitioned_of(), 1040 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1042 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1043 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1044 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1045 "READS": lambda self: self._parse_reads_property(), 1046 "REMOTE": lambda self: self._parse_remote_with_connection(), 1047 "RETURNS": lambda self: self._parse_returns(), 1048 "STRICT": lambda self: self.expression(exp.StrictProperty), 1049 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1050 "ROW": lambda self: self._parse_row(), 1051 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1052 "SAMPLE": lambda self: self.expression( 1053 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1054 ), 1055 "SECURE": lambda self: self.expression(exp.SecureProperty), 1056 "SECURITY": lambda self: self._parse_security(), 1057 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1058 "SETTINGS": lambda self: self._parse_settings_property(), 1059 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1060 "SORTKEY": lambda self: self._parse_sortkey(), 1061 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1062 "STABLE": lambda self: self.expression( 1063 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1064 ), 1065 "STORED": lambda self: self._parse_stored(), 1066 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1067 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1068 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1069 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1070 "TO": lambda self: self._parse_to_table(), 1071 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1072 "TRANSFORM": lambda self: self.expression( 1073 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1074 ), 1075 "TTL": lambda self: self._parse_ttl(), 1076 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1077 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1078 "VOLATILE": lambda self: self._parse_volatile_property(), 1079 "WITH": lambda self: self._parse_with_property(), 1080 } 1081 1082 CONSTRAINT_PARSERS = { 1083 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1084 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1085 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1086 "CHARACTER SET": lambda self: self.expression( 1087 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1088 ), 1089 "CHECK": lambda self: self.expression( 1090 exp.CheckColumnConstraint, 1091 this=self._parse_wrapped(self._parse_assignment), 1092 enforced=self._match_text_seq("ENFORCED"), 1093 ), 1094 "COLLATE": lambda self: self.expression( 1095 exp.CollateColumnConstraint, 1096 this=self._parse_identifier() or self._parse_column(), 1097 ), 1098 "COMMENT": lambda self: self.expression( 1099 exp.CommentColumnConstraint, this=self._parse_string() 1100 ), 1101 "COMPRESS": lambda self: self._parse_compress(), 1102 "CLUSTERED": lambda self: self.expression( 1103 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1104 ), 1105 "NONCLUSTERED": lambda self: self.expression( 1106 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1107 ), 1108 "DEFAULT": lambda self: self.expression( 1109 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1110 ), 1111 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1112 "EPHEMERAL": lambda self: self.expression( 1113 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1114 ), 1115 "EXCLUDE": lambda self: self.expression( 1116 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1117 ), 1118 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1119 "FORMAT": lambda self: self.expression( 1120 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1121 ), 1122 "GENERATED": lambda self: self._parse_generated_as_identity(), 1123 "IDENTITY": lambda self: self._parse_auto_increment(), 1124 "INLINE": lambda self: self._parse_inline(), 1125 "LIKE": lambda self: self._parse_create_like(), 1126 "NOT": lambda self: self._parse_not_constraint(), 1127 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1128 "ON": lambda self: ( 1129 self._match(TokenType.UPDATE) 1130 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1131 ) 1132 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1133 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1134 "PERIOD": lambda self: self._parse_period_for_system_time(), 1135 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1136 "REFERENCES": lambda self: self._parse_references(match=False), 1137 "TITLE": lambda self: self.expression( 1138 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1139 ), 1140 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1141 "UNIQUE": lambda self: self._parse_unique(), 1142 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1143 "WATERMARK": lambda self: self.expression( 1144 exp.WatermarkColumnConstraint, 1145 this=self._match(TokenType.FOR) and self._parse_column(), 1146 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1147 ), 1148 "WITH": lambda self: self.expression( 1149 exp.Properties, expressions=self._parse_wrapped_properties() 1150 ), 1151 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1153 } 1154 1155 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1156 if not self._match(TokenType.L_PAREN, advance=False): 1157 # Partitioning by bucket or truncate follows the syntax: 1158 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1159 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1160 self._retreat(self._index - 1) 1161 return None 1162 1163 klass = ( 1164 exp.PartitionedByBucket 1165 if self._prev.text.upper() == "BUCKET" 1166 else exp.PartitionByTruncate 1167 ) 1168 1169 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1170 this, expression = seq_get(args, 0), seq_get(args, 1) 1171 1172 if isinstance(this, exp.Literal): 1173 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1174 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1175 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1176 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1177 # 1178 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1179 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1180 this, expression = expression, this 1181 1182 return self.expression(klass, this=this, expression=expression) 1183 1184 ALTER_PARSERS = { 1185 "ADD": lambda self: self._parse_alter_table_add(), 1186 "AS": lambda self: self._parse_select(), 1187 "ALTER": lambda self: self._parse_alter_table_alter(), 1188 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1189 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1190 "DROP": lambda self: self._parse_alter_table_drop(), 1191 "RENAME": lambda self: self._parse_alter_table_rename(), 1192 "SET": lambda self: self._parse_alter_table_set(), 1193 "SWAP": lambda self: self.expression( 1194 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1195 ), 1196 } 1197 1198 ALTER_ALTER_PARSERS = { 1199 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1200 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1201 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1202 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1203 } 1204 1205 SCHEMA_UNNAMED_CONSTRAINTS = { 1206 "CHECK", 1207 "EXCLUDE", 1208 "FOREIGN KEY", 1209 "LIKE", 1210 "PERIOD", 1211 "PRIMARY KEY", 1212 "UNIQUE", 1213 "WATERMARK", 1214 "BUCKET", 1215 "TRUNCATE", 1216 } 1217 1218 NO_PAREN_FUNCTION_PARSERS = { 1219 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1220 "CASE": lambda self: self._parse_case(), 1221 "CONNECT_BY_ROOT": lambda self: self.expression( 1222 exp.ConnectByRoot, this=self._parse_column() 1223 ), 1224 "IF": lambda self: self._parse_if(), 1225 } 1226 1227 INVALID_FUNC_NAME_TOKENS = { 1228 TokenType.IDENTIFIER, 1229 TokenType.STRING, 1230 } 1231 1232 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1233 1234 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1235 1236 FUNCTION_PARSERS = { 1237 **{ 1238 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1239 }, 1240 **{ 1241 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1242 }, 1243 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1244 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1245 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1246 "DECODE": lambda self: self._parse_decode(), 1247 "EXTRACT": lambda self: self._parse_extract(), 1248 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1249 "GAP_FILL": lambda self: self._parse_gap_fill(), 1250 "JSON_OBJECT": lambda self: self._parse_json_object(), 1251 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1252 "JSON_TABLE": lambda self: self._parse_json_table(), 1253 "MATCH": lambda self: self._parse_match_against(), 1254 "NORMALIZE": lambda self: self._parse_normalize(), 1255 "OPENJSON": lambda self: self._parse_open_json(), 1256 "OVERLAY": lambda self: self._parse_overlay(), 1257 "POSITION": lambda self: self._parse_position(), 1258 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1259 "STRING_AGG": lambda self: self._parse_string_agg(), 1260 "SUBSTRING": lambda self: self._parse_substring(), 1261 "TRIM": lambda self: self._parse_trim(), 1262 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1263 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1264 "XMLELEMENT": lambda self: self.expression( 1265 exp.XMLElement, 1266 this=self._match_text_seq("NAME") and self._parse_id_var(), 1267 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1268 ), 1269 "XMLTABLE": lambda self: self._parse_xml_table(), 1270 } 1271 1272 QUERY_MODIFIER_PARSERS = { 1273 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1274 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1275 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1276 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1277 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1278 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1279 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1280 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1281 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1282 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1283 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1284 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1285 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1286 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1288 TokenType.CLUSTER_BY: lambda self: ( 1289 "cluster", 1290 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1291 ), 1292 TokenType.DISTRIBUTE_BY: lambda self: ( 1293 "distribute", 1294 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1295 ), 1296 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1297 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1298 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1299 } 1300 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1301 1302 SET_PARSERS = { 1303 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1304 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1305 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1306 "TRANSACTION": lambda self: self._parse_set_transaction(), 1307 } 1308 1309 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1310 1311 TYPE_LITERAL_PARSERS = { 1312 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1313 } 1314 1315 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1316 1317 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1318 1319 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1320 1321 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1322 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1323 "ISOLATION": ( 1324 ("LEVEL", "REPEATABLE", "READ"), 1325 ("LEVEL", "READ", "COMMITTED"), 1326 ("LEVEL", "READ", "UNCOMITTED"), 1327 ("LEVEL", "SERIALIZABLE"), 1328 ), 1329 "READ": ("WRITE", "ONLY"), 1330 } 1331 1332 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1333 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1334 ) 1335 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1336 1337 CREATE_SEQUENCE: OPTIONS_TYPE = { 1338 "SCALE": ("EXTEND", "NOEXTEND"), 1339 "SHARD": ("EXTEND", "NOEXTEND"), 1340 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1341 **dict.fromkeys( 1342 ( 1343 "SESSION", 1344 "GLOBAL", 1345 "KEEP", 1346 "NOKEEP", 1347 "ORDER", 1348 "NOORDER", 1349 "NOCACHE", 1350 "CYCLE", 1351 "NOCYCLE", 1352 "NOMINVALUE", 1353 "NOMAXVALUE", 1354 "NOSCALE", 1355 "NOSHARD", 1356 ), 1357 tuple(), 1358 ), 1359 } 1360 1361 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1362 1363 USABLES: OPTIONS_TYPE = dict.fromkeys( 1364 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1365 ) 1366 1367 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1368 1369 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1370 "TYPE": ("EVOLUTION",), 1371 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1372 } 1373 1374 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1375 1376 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1377 1378 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1379 "NOT": ("ENFORCED",), 1380 "MATCH": ( 1381 "FULL", 1382 "PARTIAL", 1383 "SIMPLE", 1384 ), 1385 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1386 "USING": ( 1387 "BTREE", 1388 "HASH", 1389 ), 1390 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1391 } 1392 1393 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1394 "NO": ("OTHERS",), 1395 "CURRENT": ("ROW",), 1396 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1397 } 1398 1399 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1400 1401 CLONE_KEYWORDS = {"CLONE", "COPY"} 1402 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1403 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1404 1405 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1406 1407 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1408 1409 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1410 1411 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1412 1413 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1414 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1415 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1416 1417 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1418 1419 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1420 1421 ADD_CONSTRAINT_TOKENS = { 1422 TokenType.CONSTRAINT, 1423 TokenType.FOREIGN_KEY, 1424 TokenType.INDEX, 1425 TokenType.KEY, 1426 TokenType.PRIMARY_KEY, 1427 TokenType.UNIQUE, 1428 } 1429 1430 DISTINCT_TOKENS = {TokenType.DISTINCT} 1431 1432 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1433 1434 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1435 1436 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1437 1438 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1439 1440 ODBC_DATETIME_LITERALS = { 1441 "d": exp.Date, 1442 "t": exp.Time, 1443 "ts": exp.Timestamp, 1444 } 1445 1446 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1447 1448 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1449 1450 # The style options for the DESCRIBE statement 1451 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1452 1453 # The style options for the ANALYZE statement 1454 ANALYZE_STYLES = { 1455 "BUFFER_USAGE_LIMIT", 1456 "FULL", 1457 "LOCAL", 1458 "NO_WRITE_TO_BINLOG", 1459 "SAMPLE", 1460 "SKIP_LOCKED", 1461 "VERBOSE", 1462 } 1463 1464 ANALYZE_EXPRESSION_PARSERS = { 1465 "ALL": lambda self: self._parse_analyze_columns(), 1466 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1467 "DELETE": lambda self: self._parse_analyze_delete(), 1468 "DROP": lambda self: self._parse_analyze_histogram(), 1469 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1470 "LIST": lambda self: self._parse_analyze_list(), 1471 "PREDICATE": lambda self: self._parse_analyze_columns(), 1472 "UPDATE": lambda self: self._parse_analyze_histogram(), 1473 "VALIDATE": lambda self: self._parse_analyze_validate(), 1474 } 1475 1476 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1477 1478 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1479 1480 OPERATION_MODIFIERS: t.Set[str] = set() 1481 1482 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1483 1484 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1485 1486 STRICT_CAST = True 1487 1488 PREFIXED_PIVOT_COLUMNS = False 1489 IDENTIFY_PIVOT_STRINGS = False 1490 1491 LOG_DEFAULTS_TO_LN = False 1492 1493 # Whether the table sample clause expects CSV syntax 1494 TABLESAMPLE_CSV = False 1495 1496 # The default method used for table sampling 1497 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1498 1499 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1500 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1501 1502 # Whether the TRIM function expects the characters to trim as its first argument 1503 TRIM_PATTERN_FIRST = False 1504 1505 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1506 STRING_ALIASES = False 1507 1508 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1509 MODIFIERS_ATTACHED_TO_SET_OP = True 1510 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1511 1512 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1513 NO_PAREN_IF_COMMANDS = True 1514 1515 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1516 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1517 1518 # Whether the `:` operator is used to extract a value from a VARIANT column 1519 COLON_IS_VARIANT_EXTRACT = False 1520 1521 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1522 # If this is True and '(' is not found, the keyword will be treated as an identifier 1523 VALUES_FOLLOWED_BY_PAREN = True 1524 1525 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1526 SUPPORTS_IMPLICIT_UNNEST = False 1527 1528 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1529 INTERVAL_SPANS = True 1530 1531 # Whether a PARTITION clause can follow a table reference 1532 SUPPORTS_PARTITION_SELECTION = False 1533 1534 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1535 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1536 1537 # Whether the 'AS' keyword is optional in the CTE definition syntax 1538 OPTIONAL_ALIAS_TOKEN_CTE = True 1539 1540 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1541 ALTER_RENAME_REQUIRES_COLUMN = True 1542 1543 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1544 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1545 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1546 # as BigQuery, where all joins have the same precedence. 1547 JOINS_HAVE_EQUAL_PRECEDENCE = False 1548 1549 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1550 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1551 1552 # Whether map literals support arbitrary expressions as keys. 1553 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1554 # When False, keys are typically restricted to identifiers. 1555 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1556 1557 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1558 # is true for Snowflake but not for BigQuery which can also process strings 1559 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1560 1561 # Dialects like Databricks support JOINS without join criteria 1562 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1563 ADD_JOIN_ON_TRUE = False 1564 1565 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1566 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1567 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1568 1569 __slots__ = ( 1570 "error_level", 1571 "error_message_context", 1572 "max_errors", 1573 "dialect", 1574 "sql", 1575 "errors", 1576 "_tokens", 1577 "_index", 1578 "_curr", 1579 "_next", 1580 "_prev", 1581 "_prev_comments", 1582 "_pipe_cte_counter", 1583 ) 1584 1585 # Autofilled 1586 SHOW_TRIE: t.Dict = {} 1587 SET_TRIE: t.Dict = {} 1588 1589 def __init__( 1590 self, 1591 error_level: t.Optional[ErrorLevel] = None, 1592 error_message_context: int = 100, 1593 max_errors: int = 3, 1594 dialect: DialectType = None, 1595 ): 1596 from sqlglot.dialects import Dialect 1597 1598 self.error_level = error_level or ErrorLevel.IMMEDIATE 1599 self.error_message_context = error_message_context 1600 self.max_errors = max_errors 1601 self.dialect = Dialect.get_or_raise(dialect) 1602 self.reset() 1603 1604 def reset(self): 1605 self.sql = "" 1606 self.errors = [] 1607 self._tokens = [] 1608 self._index = 0 1609 self._curr = None 1610 self._next = None 1611 self._prev = None 1612 self._prev_comments = None 1613 self._pipe_cte_counter = 0 1614 1615 def parse( 1616 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1617 ) -> t.List[t.Optional[exp.Expression]]: 1618 """ 1619 Parses a list of tokens and returns a list of syntax trees, one tree 1620 per parsed SQL statement. 1621 1622 Args: 1623 raw_tokens: The list of tokens. 1624 sql: The original SQL string, used to produce helpful debug messages. 1625 1626 Returns: 1627 The list of the produced syntax trees. 1628 """ 1629 return self._parse( 1630 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1631 ) 1632 1633 def parse_into( 1634 self, 1635 expression_types: exp.IntoType, 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 """ 1640 Parses a list of tokens into a given Expression type. If a collection of Expression 1641 types is given instead, this method will try to parse the token list into each one 1642 of them, stopping at the first for which the parsing succeeds. 1643 1644 Args: 1645 expression_types: The expression type(s) to try and parse the token list into. 1646 raw_tokens: The list of tokens. 1647 sql: The original SQL string, used to produce helpful debug messages. 1648 1649 Returns: 1650 The target Expression. 1651 """ 1652 errors = [] 1653 for expression_type in ensure_list(expression_types): 1654 parser = self.EXPRESSION_PARSERS.get(expression_type) 1655 if not parser: 1656 raise TypeError(f"No parser registered for {expression_type}") 1657 1658 try: 1659 return self._parse(parser, raw_tokens, sql) 1660 except ParseError as e: 1661 e.errors[0]["into_expression"] = expression_type 1662 errors.append(e) 1663 1664 raise ParseError( 1665 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1666 errors=merge_errors(errors), 1667 ) from errors[-1] 1668 1669 def _parse( 1670 self, 1671 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1672 raw_tokens: t.List[Token], 1673 sql: t.Optional[str] = None, 1674 ) -> t.List[t.Optional[exp.Expression]]: 1675 self.reset() 1676 self.sql = sql or "" 1677 1678 total = len(raw_tokens) 1679 chunks: t.List[t.List[Token]] = [[]] 1680 1681 for i, token in enumerate(raw_tokens): 1682 if token.token_type == TokenType.SEMICOLON: 1683 if token.comments: 1684 chunks.append([token]) 1685 1686 if i < total - 1: 1687 chunks.append([]) 1688 else: 1689 chunks[-1].append(token) 1690 1691 expressions = [] 1692 1693 for tokens in chunks: 1694 self._index = -1 1695 self._tokens = tokens 1696 self._advance() 1697 1698 expressions.append(parse_method(self)) 1699 1700 if self._index < len(self._tokens): 1701 self.raise_error("Invalid expression / Unexpected token") 1702 1703 self.check_errors() 1704 1705 return expressions 1706 1707 def check_errors(self) -> None: 1708 """Logs or raises any found errors, depending on the chosen error level setting.""" 1709 if self.error_level == ErrorLevel.WARN: 1710 for error in self.errors: 1711 logger.error(str(error)) 1712 elif self.error_level == ErrorLevel.RAISE and self.errors: 1713 raise ParseError( 1714 concat_messages(self.errors, self.max_errors), 1715 errors=merge_errors(self.errors), 1716 ) 1717 1718 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1719 """ 1720 Appends an error in the list of recorded errors or raises it, depending on the chosen 1721 error level setting. 1722 """ 1723 token = token or self._curr or self._prev or Token.string("") 1724 start = token.start 1725 end = token.end + 1 1726 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1727 highlight = self.sql[start:end] 1728 end_context = self.sql[end : end + self.error_message_context] 1729 1730 error = ParseError.new( 1731 f"{message}. Line {token.line}, Col: {token.col}.\n" 1732 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1733 description=message, 1734 line=token.line, 1735 col=token.col, 1736 start_context=start_context, 1737 highlight=highlight, 1738 end_context=end_context, 1739 ) 1740 1741 if self.error_level == ErrorLevel.IMMEDIATE: 1742 raise error 1743 1744 self.errors.append(error) 1745 1746 def expression( 1747 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1748 ) -> E: 1749 """ 1750 Creates a new, validated Expression. 1751 1752 Args: 1753 exp_class: The expression class to instantiate. 1754 comments: An optional list of comments to attach to the expression. 1755 kwargs: The arguments to set for the expression along with their respective values. 1756 1757 Returns: 1758 The target expression. 1759 """ 1760 instance = exp_class(**kwargs) 1761 instance.add_comments(comments) if comments else self._add_comments(instance) 1762 return self.validate_expression(instance) 1763 1764 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1765 if expression and self._prev_comments: 1766 expression.add_comments(self._prev_comments) 1767 self._prev_comments = None 1768 1769 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1770 """ 1771 Validates an Expression, making sure that all its mandatory arguments are set. 1772 1773 Args: 1774 expression: The expression to validate. 1775 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1776 1777 Returns: 1778 The validated expression. 1779 """ 1780 if self.error_level != ErrorLevel.IGNORE: 1781 for error_message in expression.error_messages(args): 1782 self.raise_error(error_message) 1783 1784 return expression 1785 1786 def _find_sql(self, start: Token, end: Token) -> str: 1787 return self.sql[start.start : end.end + 1] 1788 1789 def _is_connected(self) -> bool: 1790 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1791 1792 def _advance(self, times: int = 1) -> None: 1793 self._index += times 1794 self._curr = seq_get(self._tokens, self._index) 1795 self._next = seq_get(self._tokens, self._index + 1) 1796 1797 if self._index > 0: 1798 self._prev = self._tokens[self._index - 1] 1799 self._prev_comments = self._prev.comments 1800 else: 1801 self._prev = None 1802 self._prev_comments = None 1803 1804 def _retreat(self, index: int) -> None: 1805 if index != self._index: 1806 self._advance(index - self._index) 1807 1808 def _warn_unsupported(self) -> None: 1809 if len(self._tokens) <= 1: 1810 return 1811 1812 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1813 # interested in emitting a warning for the one being currently processed. 1814 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1815 1816 logger.warning( 1817 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1818 ) 1819 1820 def _parse_command(self) -> exp.Command: 1821 self._warn_unsupported() 1822 return self.expression( 1823 exp.Command, 1824 comments=self._prev_comments, 1825 this=self._prev.text.upper(), 1826 expression=self._parse_string(), 1827 ) 1828 1829 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1830 """ 1831 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1832 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1833 solve this by setting & resetting the parser state accordingly 1834 """ 1835 index = self._index 1836 error_level = self.error_level 1837 1838 self.error_level = ErrorLevel.IMMEDIATE 1839 try: 1840 this = parse_method() 1841 except ParseError: 1842 this = None 1843 finally: 1844 if not this or retreat: 1845 self._retreat(index) 1846 self.error_level = error_level 1847 1848 return this 1849 1850 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1851 start = self._prev 1852 exists = self._parse_exists() if allow_exists else None 1853 1854 self._match(TokenType.ON) 1855 1856 materialized = self._match_text_seq("MATERIALIZED") 1857 kind = self._match_set(self.CREATABLES) and self._prev 1858 if not kind: 1859 return self._parse_as_command(start) 1860 1861 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1862 this = self._parse_user_defined_function(kind=kind.token_type) 1863 elif kind.token_type == TokenType.TABLE: 1864 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1865 elif kind.token_type == TokenType.COLUMN: 1866 this = self._parse_column() 1867 else: 1868 this = self._parse_id_var() 1869 1870 self._match(TokenType.IS) 1871 1872 return self.expression( 1873 exp.Comment, 1874 this=this, 1875 kind=kind.text, 1876 expression=self._parse_string(), 1877 exists=exists, 1878 materialized=materialized, 1879 ) 1880 1881 def _parse_to_table( 1882 self, 1883 ) -> exp.ToTableProperty: 1884 table = self._parse_table_parts(schema=True) 1885 return self.expression(exp.ToTableProperty, this=table) 1886 1887 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1888 def _parse_ttl(self) -> exp.Expression: 1889 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1890 this = self._parse_bitwise() 1891 1892 if self._match_text_seq("DELETE"): 1893 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1894 if self._match_text_seq("RECOMPRESS"): 1895 return self.expression( 1896 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1897 ) 1898 if self._match_text_seq("TO", "DISK"): 1899 return self.expression( 1900 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1901 ) 1902 if self._match_text_seq("TO", "VOLUME"): 1903 return self.expression( 1904 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1905 ) 1906 1907 return this 1908 1909 expressions = self._parse_csv(_parse_ttl_action) 1910 where = self._parse_where() 1911 group = self._parse_group() 1912 1913 aggregates = None 1914 if group and self._match(TokenType.SET): 1915 aggregates = self._parse_csv(self._parse_set_item) 1916 1917 return self.expression( 1918 exp.MergeTreeTTL, 1919 expressions=expressions, 1920 where=where, 1921 group=group, 1922 aggregates=aggregates, 1923 ) 1924 1925 def _parse_statement(self) -> t.Optional[exp.Expression]: 1926 if self._curr is None: 1927 return None 1928 1929 if self._match_set(self.STATEMENT_PARSERS): 1930 comments = self._prev_comments 1931 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1932 stmt.add_comments(comments, prepend=True) 1933 return stmt 1934 1935 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1936 return self._parse_command() 1937 1938 expression = self._parse_expression() 1939 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1940 return self._parse_query_modifiers(expression) 1941 1942 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1943 start = self._prev 1944 temporary = self._match(TokenType.TEMPORARY) 1945 materialized = self._match_text_seq("MATERIALIZED") 1946 1947 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1948 if not kind: 1949 return self._parse_as_command(start) 1950 1951 concurrently = self._match_text_seq("CONCURRENTLY") 1952 if_exists = exists or self._parse_exists() 1953 1954 if kind == "COLUMN": 1955 this = self._parse_column() 1956 else: 1957 this = self._parse_table_parts( 1958 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1959 ) 1960 1961 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1962 1963 if self._match(TokenType.L_PAREN, advance=False): 1964 expressions = self._parse_wrapped_csv(self._parse_types) 1965 else: 1966 expressions = None 1967 1968 return self.expression( 1969 exp.Drop, 1970 exists=if_exists, 1971 this=this, 1972 expressions=expressions, 1973 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1974 temporary=temporary, 1975 materialized=materialized, 1976 cascade=self._match_text_seq("CASCADE"), 1977 constraints=self._match_text_seq("CONSTRAINTS"), 1978 purge=self._match_text_seq("PURGE"), 1979 cluster=cluster, 1980 concurrently=concurrently, 1981 ) 1982 1983 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1984 return ( 1985 self._match_text_seq("IF") 1986 and (not not_ or self._match(TokenType.NOT)) 1987 and self._match(TokenType.EXISTS) 1988 ) 1989 1990 def _parse_create(self) -> exp.Create | exp.Command: 1991 # Note: this can't be None because we've matched a statement parser 1992 start = self._prev 1993 1994 replace = ( 1995 start.token_type == TokenType.REPLACE 1996 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1997 or self._match_pair(TokenType.OR, TokenType.ALTER) 1998 ) 1999 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2000 2001 unique = self._match(TokenType.UNIQUE) 2002 2003 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2004 clustered = True 2005 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2006 "COLUMNSTORE" 2007 ): 2008 clustered = False 2009 else: 2010 clustered = None 2011 2012 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2013 self._advance() 2014 2015 properties = None 2016 create_token = self._match_set(self.CREATABLES) and self._prev 2017 2018 if not create_token: 2019 # exp.Properties.Location.POST_CREATE 2020 properties = self._parse_properties() 2021 create_token = self._match_set(self.CREATABLES) and self._prev 2022 2023 if not properties or not create_token: 2024 return self._parse_as_command(start) 2025 2026 concurrently = self._match_text_seq("CONCURRENTLY") 2027 exists = self._parse_exists(not_=True) 2028 this = None 2029 expression: t.Optional[exp.Expression] = None 2030 indexes = None 2031 no_schema_binding = None 2032 begin = None 2033 end = None 2034 clone = None 2035 2036 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2037 nonlocal properties 2038 if properties and temp_props: 2039 properties.expressions.extend(temp_props.expressions) 2040 elif temp_props: 2041 properties = temp_props 2042 2043 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2044 this = self._parse_user_defined_function(kind=create_token.token_type) 2045 2046 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2047 extend_props(self._parse_properties()) 2048 2049 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2050 extend_props(self._parse_properties()) 2051 2052 if not expression: 2053 if self._match(TokenType.COMMAND): 2054 expression = self._parse_as_command(self._prev) 2055 else: 2056 begin = self._match(TokenType.BEGIN) 2057 return_ = self._match_text_seq("RETURN") 2058 2059 if self._match(TokenType.STRING, advance=False): 2060 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2061 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2062 expression = self._parse_string() 2063 extend_props(self._parse_properties()) 2064 else: 2065 expression = self._parse_user_defined_function_expression() 2066 2067 end = self._match_text_seq("END") 2068 2069 if return_: 2070 expression = self.expression(exp.Return, this=expression) 2071 elif create_token.token_type == TokenType.INDEX: 2072 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2073 if not self._match(TokenType.ON): 2074 index = self._parse_id_var() 2075 anonymous = False 2076 else: 2077 index = None 2078 anonymous = True 2079 2080 this = self._parse_index(index=index, anonymous=anonymous) 2081 elif create_token.token_type in self.DB_CREATABLES: 2082 table_parts = self._parse_table_parts( 2083 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2084 ) 2085 2086 # exp.Properties.Location.POST_NAME 2087 self._match(TokenType.COMMA) 2088 extend_props(self._parse_properties(before=True)) 2089 2090 this = self._parse_schema(this=table_parts) 2091 2092 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2093 extend_props(self._parse_properties()) 2094 2095 has_alias = self._match(TokenType.ALIAS) 2096 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2097 # exp.Properties.Location.POST_ALIAS 2098 extend_props(self._parse_properties()) 2099 2100 if create_token.token_type == TokenType.SEQUENCE: 2101 expression = self._parse_types() 2102 props = self._parse_properties() 2103 if props: 2104 sequence_props = exp.SequenceProperties() 2105 options = [] 2106 for prop in props: 2107 if isinstance(prop, exp.SequenceProperties): 2108 for arg, value in prop.args.items(): 2109 if arg == "options": 2110 options.extend(value) 2111 else: 2112 sequence_props.set(arg, value) 2113 prop.pop() 2114 2115 if options: 2116 sequence_props.set("options", options) 2117 2118 props.append("expressions", sequence_props) 2119 extend_props(props) 2120 else: 2121 expression = self._parse_ddl_select() 2122 2123 # Some dialects also support using a table as an alias instead of a SELECT. 2124 # Here we fallback to this as an alternative. 2125 if not expression and has_alias: 2126 expression = self._try_parse(self._parse_table_parts) 2127 2128 if create_token.token_type == TokenType.TABLE: 2129 # exp.Properties.Location.POST_EXPRESSION 2130 extend_props(self._parse_properties()) 2131 2132 indexes = [] 2133 while True: 2134 index = self._parse_index() 2135 2136 # exp.Properties.Location.POST_INDEX 2137 extend_props(self._parse_properties()) 2138 if not index: 2139 break 2140 else: 2141 self._match(TokenType.COMMA) 2142 indexes.append(index) 2143 elif create_token.token_type == TokenType.VIEW: 2144 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2145 no_schema_binding = True 2146 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2147 extend_props(self._parse_properties()) 2148 2149 shallow = self._match_text_seq("SHALLOW") 2150 2151 if self._match_texts(self.CLONE_KEYWORDS): 2152 copy = self._prev.text.lower() == "copy" 2153 clone = self.expression( 2154 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2155 ) 2156 2157 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2158 return self._parse_as_command(start) 2159 2160 create_kind_text = create_token.text.upper() 2161 return self.expression( 2162 exp.Create, 2163 this=this, 2164 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2165 replace=replace, 2166 refresh=refresh, 2167 unique=unique, 2168 expression=expression, 2169 exists=exists, 2170 properties=properties, 2171 indexes=indexes, 2172 no_schema_binding=no_schema_binding, 2173 begin=begin, 2174 end=end, 2175 clone=clone, 2176 concurrently=concurrently, 2177 clustered=clustered, 2178 ) 2179 2180 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2181 seq = exp.SequenceProperties() 2182 2183 options = [] 2184 index = self._index 2185 2186 while self._curr: 2187 self._match(TokenType.COMMA) 2188 if self._match_text_seq("INCREMENT"): 2189 self._match_text_seq("BY") 2190 self._match_text_seq("=") 2191 seq.set("increment", self._parse_term()) 2192 elif self._match_text_seq("MINVALUE"): 2193 seq.set("minvalue", self._parse_term()) 2194 elif self._match_text_seq("MAXVALUE"): 2195 seq.set("maxvalue", self._parse_term()) 2196 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2197 self._match_text_seq("=") 2198 seq.set("start", self._parse_term()) 2199 elif self._match_text_seq("CACHE"): 2200 # T-SQL allows empty CACHE which is initialized dynamically 2201 seq.set("cache", self._parse_number() or True) 2202 elif self._match_text_seq("OWNED", "BY"): 2203 # "OWNED BY NONE" is the default 2204 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2205 else: 2206 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2207 if opt: 2208 options.append(opt) 2209 else: 2210 break 2211 2212 seq.set("options", options if options else None) 2213 return None if self._index == index else seq 2214 2215 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2216 # only used for teradata currently 2217 self._match(TokenType.COMMA) 2218 2219 kwargs = { 2220 "no": self._match_text_seq("NO"), 2221 "dual": self._match_text_seq("DUAL"), 2222 "before": self._match_text_seq("BEFORE"), 2223 "default": self._match_text_seq("DEFAULT"), 2224 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2225 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2226 "after": self._match_text_seq("AFTER"), 2227 "minimum": self._match_texts(("MIN", "MINIMUM")), 2228 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2229 } 2230 2231 if self._match_texts(self.PROPERTY_PARSERS): 2232 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2233 try: 2234 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2235 except TypeError: 2236 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2237 2238 return None 2239 2240 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2241 return self._parse_wrapped_csv(self._parse_property) 2242 2243 def _parse_property(self) -> t.Optional[exp.Expression]: 2244 if self._match_texts(self.PROPERTY_PARSERS): 2245 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2246 2247 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2248 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2249 2250 if self._match_text_seq("COMPOUND", "SORTKEY"): 2251 return self._parse_sortkey(compound=True) 2252 2253 if self._match_text_seq("SQL", "SECURITY"): 2254 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2255 2256 index = self._index 2257 2258 seq_props = self._parse_sequence_properties() 2259 if seq_props: 2260 return seq_props 2261 2262 self._retreat(index) 2263 key = self._parse_column() 2264 2265 if not self._match(TokenType.EQ): 2266 self._retreat(index) 2267 return None 2268 2269 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2270 if isinstance(key, exp.Column): 2271 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2272 2273 value = self._parse_bitwise() or self._parse_var(any_token=True) 2274 2275 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2276 if isinstance(value, exp.Column): 2277 value = exp.var(value.name) 2278 2279 return self.expression(exp.Property, this=key, value=value) 2280 2281 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2282 if self._match_text_seq("BY"): 2283 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2284 2285 self._match(TokenType.ALIAS) 2286 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2287 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2288 2289 return self.expression( 2290 exp.FileFormatProperty, 2291 this=( 2292 self.expression( 2293 exp.InputOutputFormat, 2294 input_format=input_format, 2295 output_format=output_format, 2296 ) 2297 if input_format or output_format 2298 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2299 ), 2300 hive_format=True, 2301 ) 2302 2303 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2304 field = self._parse_field() 2305 if isinstance(field, exp.Identifier) and not field.quoted: 2306 field = exp.var(field) 2307 2308 return field 2309 2310 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2311 self._match(TokenType.EQ) 2312 self._match(TokenType.ALIAS) 2313 2314 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2315 2316 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2317 properties = [] 2318 while True: 2319 if before: 2320 prop = self._parse_property_before() 2321 else: 2322 prop = self._parse_property() 2323 if not prop: 2324 break 2325 for p in ensure_list(prop): 2326 properties.append(p) 2327 2328 if properties: 2329 return self.expression(exp.Properties, expressions=properties) 2330 2331 return None 2332 2333 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2334 return self.expression( 2335 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2336 ) 2337 2338 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2339 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2340 security_specifier = self._prev.text.upper() 2341 return self.expression(exp.SecurityProperty, this=security_specifier) 2342 return None 2343 2344 def _parse_settings_property(self) -> exp.SettingsProperty: 2345 return self.expression( 2346 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2347 ) 2348 2349 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2350 if self._index >= 2: 2351 pre_volatile_token = self._tokens[self._index - 2] 2352 else: 2353 pre_volatile_token = None 2354 2355 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2356 return exp.VolatileProperty() 2357 2358 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2359 2360 def _parse_retention_period(self) -> exp.Var: 2361 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2362 number = self._parse_number() 2363 number_str = f"{number} " if number else "" 2364 unit = self._parse_var(any_token=True) 2365 return exp.var(f"{number_str}{unit}") 2366 2367 def _parse_system_versioning_property( 2368 self, with_: bool = False 2369 ) -> exp.WithSystemVersioningProperty: 2370 self._match(TokenType.EQ) 2371 prop = self.expression( 2372 exp.WithSystemVersioningProperty, 2373 **{ # type: ignore 2374 "on": True, 2375 "with": with_, 2376 }, 2377 ) 2378 2379 if self._match_text_seq("OFF"): 2380 prop.set("on", False) 2381 return prop 2382 2383 self._match(TokenType.ON) 2384 if self._match(TokenType.L_PAREN): 2385 while self._curr and not self._match(TokenType.R_PAREN): 2386 if self._match_text_seq("HISTORY_TABLE", "="): 2387 prop.set("this", self._parse_table_parts()) 2388 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2389 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2390 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2391 prop.set("retention_period", self._parse_retention_period()) 2392 2393 self._match(TokenType.COMMA) 2394 2395 return prop 2396 2397 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2398 self._match(TokenType.EQ) 2399 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2400 prop = self.expression(exp.DataDeletionProperty, on=on) 2401 2402 if self._match(TokenType.L_PAREN): 2403 while self._curr and not self._match(TokenType.R_PAREN): 2404 if self._match_text_seq("FILTER_COLUMN", "="): 2405 prop.set("filter_column", self._parse_column()) 2406 elif self._match_text_seq("RETENTION_PERIOD", "="): 2407 prop.set("retention_period", self._parse_retention_period()) 2408 2409 self._match(TokenType.COMMA) 2410 2411 return prop 2412 2413 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2414 kind = "HASH" 2415 expressions: t.Optional[t.List[exp.Expression]] = None 2416 if self._match_text_seq("BY", "HASH"): 2417 expressions = self._parse_wrapped_csv(self._parse_id_var) 2418 elif self._match_text_seq("BY", "RANDOM"): 2419 kind = "RANDOM" 2420 2421 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2422 buckets: t.Optional[exp.Expression] = None 2423 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2424 buckets = self._parse_number() 2425 2426 return self.expression( 2427 exp.DistributedByProperty, 2428 expressions=expressions, 2429 kind=kind, 2430 buckets=buckets, 2431 order=self._parse_order(), 2432 ) 2433 2434 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2435 self._match_text_seq("KEY") 2436 expressions = self._parse_wrapped_id_vars() 2437 return self.expression(expr_type, expressions=expressions) 2438 2439 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2440 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2441 prop = self._parse_system_versioning_property(with_=True) 2442 self._match_r_paren() 2443 return prop 2444 2445 if self._match(TokenType.L_PAREN, advance=False): 2446 return self._parse_wrapped_properties() 2447 2448 if self._match_text_seq("JOURNAL"): 2449 return self._parse_withjournaltable() 2450 2451 if self._match_texts(self.VIEW_ATTRIBUTES): 2452 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2453 2454 if self._match_text_seq("DATA"): 2455 return self._parse_withdata(no=False) 2456 elif self._match_text_seq("NO", "DATA"): 2457 return self._parse_withdata(no=True) 2458 2459 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2460 return self._parse_serde_properties(with_=True) 2461 2462 if self._match(TokenType.SCHEMA): 2463 return self.expression( 2464 exp.WithSchemaBindingProperty, 2465 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2466 ) 2467 2468 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2469 return self.expression( 2470 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2471 ) 2472 2473 if not self._next: 2474 return None 2475 2476 return self._parse_withisolatedloading() 2477 2478 def _parse_procedure_option(self) -> exp.Expression | None: 2479 if self._match_text_seq("EXECUTE", "AS"): 2480 return self.expression( 2481 exp.ExecuteAsProperty, 2482 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2483 or self._parse_string(), 2484 ) 2485 2486 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2487 2488 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2489 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2490 self._match(TokenType.EQ) 2491 2492 user = self._parse_id_var() 2493 self._match(TokenType.PARAMETER) 2494 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2495 2496 if not user or not host: 2497 return None 2498 2499 return exp.DefinerProperty(this=f"{user}@{host}") 2500 2501 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2502 self._match(TokenType.TABLE) 2503 self._match(TokenType.EQ) 2504 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2505 2506 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2507 return self.expression(exp.LogProperty, no=no) 2508 2509 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2510 return self.expression(exp.JournalProperty, **kwargs) 2511 2512 def _parse_checksum(self) -> exp.ChecksumProperty: 2513 self._match(TokenType.EQ) 2514 2515 on = None 2516 if self._match(TokenType.ON): 2517 on = True 2518 elif self._match_text_seq("OFF"): 2519 on = False 2520 2521 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2522 2523 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2524 return self.expression( 2525 exp.Cluster, 2526 expressions=( 2527 self._parse_wrapped_csv(self._parse_ordered) 2528 if wrapped 2529 else self._parse_csv(self._parse_ordered) 2530 ), 2531 ) 2532 2533 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2534 self._match_text_seq("BY") 2535 2536 self._match_l_paren() 2537 expressions = self._parse_csv(self._parse_column) 2538 self._match_r_paren() 2539 2540 if self._match_text_seq("SORTED", "BY"): 2541 self._match_l_paren() 2542 sorted_by = self._parse_csv(self._parse_ordered) 2543 self._match_r_paren() 2544 else: 2545 sorted_by = None 2546 2547 self._match(TokenType.INTO) 2548 buckets = self._parse_number() 2549 self._match_text_seq("BUCKETS") 2550 2551 return self.expression( 2552 exp.ClusteredByProperty, 2553 expressions=expressions, 2554 sorted_by=sorted_by, 2555 buckets=buckets, 2556 ) 2557 2558 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2559 if not self._match_text_seq("GRANTS"): 2560 self._retreat(self._index - 1) 2561 return None 2562 2563 return self.expression(exp.CopyGrantsProperty) 2564 2565 def _parse_freespace(self) -> exp.FreespaceProperty: 2566 self._match(TokenType.EQ) 2567 return self.expression( 2568 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2569 ) 2570 2571 def _parse_mergeblockratio( 2572 self, no: bool = False, default: bool = False 2573 ) -> exp.MergeBlockRatioProperty: 2574 if self._match(TokenType.EQ): 2575 return self.expression( 2576 exp.MergeBlockRatioProperty, 2577 this=self._parse_number(), 2578 percent=self._match(TokenType.PERCENT), 2579 ) 2580 2581 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2582 2583 def _parse_datablocksize( 2584 self, 2585 default: t.Optional[bool] = None, 2586 minimum: t.Optional[bool] = None, 2587 maximum: t.Optional[bool] = None, 2588 ) -> exp.DataBlocksizeProperty: 2589 self._match(TokenType.EQ) 2590 size = self._parse_number() 2591 2592 units = None 2593 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2594 units = self._prev.text 2595 2596 return self.expression( 2597 exp.DataBlocksizeProperty, 2598 size=size, 2599 units=units, 2600 default=default, 2601 minimum=minimum, 2602 maximum=maximum, 2603 ) 2604 2605 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2606 self._match(TokenType.EQ) 2607 always = self._match_text_seq("ALWAYS") 2608 manual = self._match_text_seq("MANUAL") 2609 never = self._match_text_seq("NEVER") 2610 default = self._match_text_seq("DEFAULT") 2611 2612 autotemp = None 2613 if self._match_text_seq("AUTOTEMP"): 2614 autotemp = self._parse_schema() 2615 2616 return self.expression( 2617 exp.BlockCompressionProperty, 2618 always=always, 2619 manual=manual, 2620 never=never, 2621 default=default, 2622 autotemp=autotemp, 2623 ) 2624 2625 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2626 index = self._index 2627 no = self._match_text_seq("NO") 2628 concurrent = self._match_text_seq("CONCURRENT") 2629 2630 if not self._match_text_seq("ISOLATED", "LOADING"): 2631 self._retreat(index) 2632 return None 2633 2634 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2635 return self.expression( 2636 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2637 ) 2638 2639 def _parse_locking(self) -> exp.LockingProperty: 2640 if self._match(TokenType.TABLE): 2641 kind = "TABLE" 2642 elif self._match(TokenType.VIEW): 2643 kind = "VIEW" 2644 elif self._match(TokenType.ROW): 2645 kind = "ROW" 2646 elif self._match_text_seq("DATABASE"): 2647 kind = "DATABASE" 2648 else: 2649 kind = None 2650 2651 if kind in ("DATABASE", "TABLE", "VIEW"): 2652 this = self._parse_table_parts() 2653 else: 2654 this = None 2655 2656 if self._match(TokenType.FOR): 2657 for_or_in = "FOR" 2658 elif self._match(TokenType.IN): 2659 for_or_in = "IN" 2660 else: 2661 for_or_in = None 2662 2663 if self._match_text_seq("ACCESS"): 2664 lock_type = "ACCESS" 2665 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2666 lock_type = "EXCLUSIVE" 2667 elif self._match_text_seq("SHARE"): 2668 lock_type = "SHARE" 2669 elif self._match_text_seq("READ"): 2670 lock_type = "READ" 2671 elif self._match_text_seq("WRITE"): 2672 lock_type = "WRITE" 2673 elif self._match_text_seq("CHECKSUM"): 2674 lock_type = "CHECKSUM" 2675 else: 2676 lock_type = None 2677 2678 override = self._match_text_seq("OVERRIDE") 2679 2680 return self.expression( 2681 exp.LockingProperty, 2682 this=this, 2683 kind=kind, 2684 for_or_in=for_or_in, 2685 lock_type=lock_type, 2686 override=override, 2687 ) 2688 2689 def _parse_partition_by(self) -> t.List[exp.Expression]: 2690 if self._match(TokenType.PARTITION_BY): 2691 return self._parse_csv(self._parse_assignment) 2692 return [] 2693 2694 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2695 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2696 if self._match_text_seq("MINVALUE"): 2697 return exp.var("MINVALUE") 2698 if self._match_text_seq("MAXVALUE"): 2699 return exp.var("MAXVALUE") 2700 return self._parse_bitwise() 2701 2702 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2703 expression = None 2704 from_expressions = None 2705 to_expressions = None 2706 2707 if self._match(TokenType.IN): 2708 this = self._parse_wrapped_csv(self._parse_bitwise) 2709 elif self._match(TokenType.FROM): 2710 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2711 self._match_text_seq("TO") 2712 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2713 elif self._match_text_seq("WITH", "(", "MODULUS"): 2714 this = self._parse_number() 2715 self._match_text_seq(",", "REMAINDER") 2716 expression = self._parse_number() 2717 self._match_r_paren() 2718 else: 2719 self.raise_error("Failed to parse partition bound spec.") 2720 2721 return self.expression( 2722 exp.PartitionBoundSpec, 2723 this=this, 2724 expression=expression, 2725 from_expressions=from_expressions, 2726 to_expressions=to_expressions, 2727 ) 2728 2729 # https://www.postgresql.org/docs/current/sql-createtable.html 2730 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2731 if not self._match_text_seq("OF"): 2732 self._retreat(self._index - 1) 2733 return None 2734 2735 this = self._parse_table(schema=True) 2736 2737 if self._match(TokenType.DEFAULT): 2738 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2739 elif self._match_text_seq("FOR", "VALUES"): 2740 expression = self._parse_partition_bound_spec() 2741 else: 2742 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2743 2744 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2745 2746 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2747 self._match(TokenType.EQ) 2748 return self.expression( 2749 exp.PartitionedByProperty, 2750 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2751 ) 2752 2753 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2754 if self._match_text_seq("AND", "STATISTICS"): 2755 statistics = True 2756 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2757 statistics = False 2758 else: 2759 statistics = None 2760 2761 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2762 2763 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2764 if self._match_text_seq("SQL"): 2765 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2766 return None 2767 2768 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2769 if self._match_text_seq("SQL", "DATA"): 2770 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2771 return None 2772 2773 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2774 if self._match_text_seq("PRIMARY", "INDEX"): 2775 return exp.NoPrimaryIndexProperty() 2776 if self._match_text_seq("SQL"): 2777 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2778 return None 2779 2780 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2781 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2782 return exp.OnCommitProperty() 2783 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2784 return exp.OnCommitProperty(delete=True) 2785 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2786 2787 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2788 if self._match_text_seq("SQL", "DATA"): 2789 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2790 return None 2791 2792 def _parse_distkey(self) -> exp.DistKeyProperty: 2793 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2794 2795 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2796 table = self._parse_table(schema=True) 2797 2798 options = [] 2799 while self._match_texts(("INCLUDING", "EXCLUDING")): 2800 this = self._prev.text.upper() 2801 2802 id_var = self._parse_id_var() 2803 if not id_var: 2804 return None 2805 2806 options.append( 2807 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2808 ) 2809 2810 return self.expression(exp.LikeProperty, this=table, expressions=options) 2811 2812 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2813 return self.expression( 2814 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2815 ) 2816 2817 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2818 self._match(TokenType.EQ) 2819 return self.expression( 2820 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2821 ) 2822 2823 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2824 self._match_text_seq("WITH", "CONNECTION") 2825 return self.expression( 2826 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2827 ) 2828 2829 def _parse_returns(self) -> exp.ReturnsProperty: 2830 value: t.Optional[exp.Expression] 2831 null = None 2832 is_table = self._match(TokenType.TABLE) 2833 2834 if is_table: 2835 if self._match(TokenType.LT): 2836 value = self.expression( 2837 exp.Schema, 2838 this="TABLE", 2839 expressions=self._parse_csv(self._parse_struct_types), 2840 ) 2841 if not self._match(TokenType.GT): 2842 self.raise_error("Expecting >") 2843 else: 2844 value = self._parse_schema(exp.var("TABLE")) 2845 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2846 null = True 2847 value = None 2848 else: 2849 value = self._parse_types() 2850 2851 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2852 2853 def _parse_describe(self) -> exp.Describe: 2854 kind = self._match_set(self.CREATABLES) and self._prev.text 2855 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2856 if self._match(TokenType.DOT): 2857 style = None 2858 self._retreat(self._index - 2) 2859 2860 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2861 2862 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2863 this = self._parse_statement() 2864 else: 2865 this = self._parse_table(schema=True) 2866 2867 properties = self._parse_properties() 2868 expressions = properties.expressions if properties else None 2869 partition = self._parse_partition() 2870 return self.expression( 2871 exp.Describe, 2872 this=this, 2873 style=style, 2874 kind=kind, 2875 expressions=expressions, 2876 partition=partition, 2877 format=format, 2878 ) 2879 2880 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2881 kind = self._prev.text.upper() 2882 expressions = [] 2883 2884 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2885 if self._match(TokenType.WHEN): 2886 expression = self._parse_disjunction() 2887 self._match(TokenType.THEN) 2888 else: 2889 expression = None 2890 2891 else_ = self._match(TokenType.ELSE) 2892 2893 if not self._match(TokenType.INTO): 2894 return None 2895 2896 return self.expression( 2897 exp.ConditionalInsert, 2898 this=self.expression( 2899 exp.Insert, 2900 this=self._parse_table(schema=True), 2901 expression=self._parse_derived_table_values(), 2902 ), 2903 expression=expression, 2904 else_=else_, 2905 ) 2906 2907 expression = parse_conditional_insert() 2908 while expression is not None: 2909 expressions.append(expression) 2910 expression = parse_conditional_insert() 2911 2912 return self.expression( 2913 exp.MultitableInserts, 2914 kind=kind, 2915 comments=comments, 2916 expressions=expressions, 2917 source=self._parse_table(), 2918 ) 2919 2920 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2921 comments = [] 2922 hint = self._parse_hint() 2923 overwrite = self._match(TokenType.OVERWRITE) 2924 ignore = self._match(TokenType.IGNORE) 2925 local = self._match_text_seq("LOCAL") 2926 alternative = None 2927 is_function = None 2928 2929 if self._match_text_seq("DIRECTORY"): 2930 this: t.Optional[exp.Expression] = self.expression( 2931 exp.Directory, 2932 this=self._parse_var_or_string(), 2933 local=local, 2934 row_format=self._parse_row_format(match_row=True), 2935 ) 2936 else: 2937 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2938 comments += ensure_list(self._prev_comments) 2939 return self._parse_multitable_inserts(comments) 2940 2941 if self._match(TokenType.OR): 2942 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2943 2944 self._match(TokenType.INTO) 2945 comments += ensure_list(self._prev_comments) 2946 self._match(TokenType.TABLE) 2947 is_function = self._match(TokenType.FUNCTION) 2948 2949 this = ( 2950 self._parse_table(schema=True, parse_partition=True) 2951 if not is_function 2952 else self._parse_function() 2953 ) 2954 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2955 this.set("alias", self._parse_table_alias()) 2956 2957 returning = self._parse_returning() 2958 2959 return self.expression( 2960 exp.Insert, 2961 comments=comments, 2962 hint=hint, 2963 is_function=is_function, 2964 this=this, 2965 stored=self._match_text_seq("STORED") and self._parse_stored(), 2966 by_name=self._match_text_seq("BY", "NAME"), 2967 exists=self._parse_exists(), 2968 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2969 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2970 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2971 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2972 conflict=self._parse_on_conflict(), 2973 returning=returning or self._parse_returning(), 2974 overwrite=overwrite, 2975 alternative=alternative, 2976 ignore=ignore, 2977 source=self._match(TokenType.TABLE) and self._parse_table(), 2978 ) 2979 2980 def _parse_kill(self) -> exp.Kill: 2981 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2982 2983 return self.expression( 2984 exp.Kill, 2985 this=self._parse_primary(), 2986 kind=kind, 2987 ) 2988 2989 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2990 conflict = self._match_text_seq("ON", "CONFLICT") 2991 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2992 2993 if not conflict and not duplicate: 2994 return None 2995 2996 conflict_keys = None 2997 constraint = None 2998 2999 if conflict: 3000 if self._match_text_seq("ON", "CONSTRAINT"): 3001 constraint = self._parse_id_var() 3002 elif self._match(TokenType.L_PAREN): 3003 conflict_keys = self._parse_csv(self._parse_id_var) 3004 self._match_r_paren() 3005 3006 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3007 if self._prev.token_type == TokenType.UPDATE: 3008 self._match(TokenType.SET) 3009 expressions = self._parse_csv(self._parse_equality) 3010 else: 3011 expressions = None 3012 3013 return self.expression( 3014 exp.OnConflict, 3015 duplicate=duplicate, 3016 expressions=expressions, 3017 action=action, 3018 conflict_keys=conflict_keys, 3019 constraint=constraint, 3020 where=self._parse_where(), 3021 ) 3022 3023 def _parse_returning(self) -> t.Optional[exp.Returning]: 3024 if not self._match(TokenType.RETURNING): 3025 return None 3026 return self.expression( 3027 exp.Returning, 3028 expressions=self._parse_csv(self._parse_expression), 3029 into=self._match(TokenType.INTO) and self._parse_table_part(), 3030 ) 3031 3032 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3033 if not self._match(TokenType.FORMAT): 3034 return None 3035 return self._parse_row_format() 3036 3037 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3038 index = self._index 3039 with_ = with_ or self._match_text_seq("WITH") 3040 3041 if not self._match(TokenType.SERDE_PROPERTIES): 3042 self._retreat(index) 3043 return None 3044 return self.expression( 3045 exp.SerdeProperties, 3046 **{ # type: ignore 3047 "expressions": self._parse_wrapped_properties(), 3048 "with": with_, 3049 }, 3050 ) 3051 3052 def _parse_row_format( 3053 self, match_row: bool = False 3054 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3055 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3056 return None 3057 3058 if self._match_text_seq("SERDE"): 3059 this = self._parse_string() 3060 3061 serde_properties = self._parse_serde_properties() 3062 3063 return self.expression( 3064 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3065 ) 3066 3067 self._match_text_seq("DELIMITED") 3068 3069 kwargs = {} 3070 3071 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3072 kwargs["fields"] = self._parse_string() 3073 if self._match_text_seq("ESCAPED", "BY"): 3074 kwargs["escaped"] = self._parse_string() 3075 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3076 kwargs["collection_items"] = self._parse_string() 3077 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3078 kwargs["map_keys"] = self._parse_string() 3079 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3080 kwargs["lines"] = self._parse_string() 3081 if self._match_text_seq("NULL", "DEFINED", "AS"): 3082 kwargs["null"] = self._parse_string() 3083 3084 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3085 3086 def _parse_load(self) -> exp.LoadData | exp.Command: 3087 if self._match_text_seq("DATA"): 3088 local = self._match_text_seq("LOCAL") 3089 self._match_text_seq("INPATH") 3090 inpath = self._parse_string() 3091 overwrite = self._match(TokenType.OVERWRITE) 3092 self._match_pair(TokenType.INTO, TokenType.TABLE) 3093 3094 return self.expression( 3095 exp.LoadData, 3096 this=self._parse_table(schema=True), 3097 local=local, 3098 overwrite=overwrite, 3099 inpath=inpath, 3100 partition=self._parse_partition(), 3101 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3102 serde=self._match_text_seq("SERDE") and self._parse_string(), 3103 ) 3104 return self._parse_as_command(self._prev) 3105 3106 def _parse_delete(self) -> exp.Delete: 3107 # This handles MySQL's "Multiple-Table Syntax" 3108 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3109 tables = None 3110 if not self._match(TokenType.FROM, advance=False): 3111 tables = self._parse_csv(self._parse_table) or None 3112 3113 returning = self._parse_returning() 3114 3115 return self.expression( 3116 exp.Delete, 3117 tables=tables, 3118 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3119 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3120 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3121 where=self._parse_where(), 3122 returning=returning or self._parse_returning(), 3123 limit=self._parse_limit(), 3124 ) 3125 3126 def _parse_update(self) -> exp.Update: 3127 kwargs: t.Dict[str, t.Any] = { 3128 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3129 } 3130 while self._curr: 3131 if self._match(TokenType.SET): 3132 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3133 elif self._match(TokenType.RETURNING, advance=False): 3134 kwargs["returning"] = self._parse_returning() 3135 elif self._match(TokenType.FROM, advance=False): 3136 kwargs["from"] = self._parse_from(joins=True) 3137 elif self._match(TokenType.WHERE, advance=False): 3138 kwargs["where"] = self._parse_where() 3139 elif self._match(TokenType.ORDER_BY, advance=False): 3140 kwargs["order"] = self._parse_order() 3141 elif self._match(TokenType.LIMIT, advance=False): 3142 kwargs["limit"] = self._parse_limit() 3143 else: 3144 break 3145 3146 return self.expression(exp.Update, **kwargs) 3147 3148 def _parse_use(self) -> exp.Use: 3149 return self.expression( 3150 exp.Use, 3151 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3152 this=self._parse_table(schema=False), 3153 ) 3154 3155 def _parse_uncache(self) -> exp.Uncache: 3156 if not self._match(TokenType.TABLE): 3157 self.raise_error("Expecting TABLE after UNCACHE") 3158 3159 return self.expression( 3160 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3161 ) 3162 3163 def _parse_cache(self) -> exp.Cache: 3164 lazy = self._match_text_seq("LAZY") 3165 self._match(TokenType.TABLE) 3166 table = self._parse_table(schema=True) 3167 3168 options = [] 3169 if self._match_text_seq("OPTIONS"): 3170 self._match_l_paren() 3171 k = self._parse_string() 3172 self._match(TokenType.EQ) 3173 v = self._parse_string() 3174 options = [k, v] 3175 self._match_r_paren() 3176 3177 self._match(TokenType.ALIAS) 3178 return self.expression( 3179 exp.Cache, 3180 this=table, 3181 lazy=lazy, 3182 options=options, 3183 expression=self._parse_select(nested=True), 3184 ) 3185 3186 def _parse_partition(self) -> t.Optional[exp.Partition]: 3187 if not self._match_texts(self.PARTITION_KEYWORDS): 3188 return None 3189 3190 return self.expression( 3191 exp.Partition, 3192 subpartition=self._prev.text.upper() == "SUBPARTITION", 3193 expressions=self._parse_wrapped_csv(self._parse_assignment), 3194 ) 3195 3196 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3197 def _parse_value_expression() -> t.Optional[exp.Expression]: 3198 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3199 return exp.var(self._prev.text.upper()) 3200 return self._parse_expression() 3201 3202 if self._match(TokenType.L_PAREN): 3203 expressions = self._parse_csv(_parse_value_expression) 3204 self._match_r_paren() 3205 return self.expression(exp.Tuple, expressions=expressions) 3206 3207 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3208 expression = self._parse_expression() 3209 if expression: 3210 return self.expression(exp.Tuple, expressions=[expression]) 3211 return None 3212 3213 def _parse_projections(self) -> t.List[exp.Expression]: 3214 return self._parse_expressions() 3215 3216 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3217 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3218 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3219 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3220 ) 3221 elif self._match(TokenType.FROM): 3222 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3223 # Support parentheses for duckdb FROM-first syntax 3224 select = self._parse_select(from_=from_) 3225 if select: 3226 if not select.args.get("from"): 3227 select.set("from", from_) 3228 this = select 3229 else: 3230 this = exp.select("*").from_(t.cast(exp.From, from_)) 3231 else: 3232 this = ( 3233 self._parse_table(consume_pipe=True) 3234 if table 3235 else self._parse_select(nested=True, parse_set_operation=False) 3236 ) 3237 3238 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3239 # in case a modifier (e.g. join) is following 3240 if table and isinstance(this, exp.Values) and this.alias: 3241 alias = this.args["alias"].pop() 3242 this = exp.Table(this=this, alias=alias) 3243 3244 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3245 3246 return this 3247 3248 def _parse_select( 3249 self, 3250 nested: bool = False, 3251 table: bool = False, 3252 parse_subquery_alias: bool = True, 3253 parse_set_operation: bool = True, 3254 consume_pipe: bool = True, 3255 from_: t.Optional[exp.From] = None, 3256 ) -> t.Optional[exp.Expression]: 3257 query = self._parse_select_query( 3258 nested=nested, 3259 table=table, 3260 parse_subquery_alias=parse_subquery_alias, 3261 parse_set_operation=parse_set_operation, 3262 ) 3263 3264 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3265 if not query and from_: 3266 query = exp.select("*").from_(from_) 3267 if isinstance(query, exp.Query): 3268 query = self._parse_pipe_syntax_query(query) 3269 query = query.subquery(copy=False) if query and table else query 3270 3271 return query 3272 3273 def _parse_select_query( 3274 self, 3275 nested: bool = False, 3276 table: bool = False, 3277 parse_subquery_alias: bool = True, 3278 parse_set_operation: bool = True, 3279 ) -> t.Optional[exp.Expression]: 3280 cte = self._parse_with() 3281 3282 if cte: 3283 this = self._parse_statement() 3284 3285 if not this: 3286 self.raise_error("Failed to parse any statement following CTE") 3287 return cte 3288 3289 if "with" in this.arg_types: 3290 this.set("with", cte) 3291 else: 3292 self.raise_error(f"{this.key} does not support CTE") 3293 this = cte 3294 3295 return this 3296 3297 # duckdb supports leading with FROM x 3298 from_ = ( 3299 self._parse_from(consume_pipe=True) 3300 if self._match(TokenType.FROM, advance=False) 3301 else None 3302 ) 3303 3304 if self._match(TokenType.SELECT): 3305 comments = self._prev_comments 3306 3307 hint = self._parse_hint() 3308 3309 if self._next and not self._next.token_type == TokenType.DOT: 3310 all_ = self._match(TokenType.ALL) 3311 distinct = self._match_set(self.DISTINCT_TOKENS) 3312 else: 3313 all_, distinct = None, None 3314 3315 kind = ( 3316 self._match(TokenType.ALIAS) 3317 and self._match_texts(("STRUCT", "VALUE")) 3318 and self._prev.text.upper() 3319 ) 3320 3321 if distinct: 3322 distinct = self.expression( 3323 exp.Distinct, 3324 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3325 ) 3326 3327 if all_ and distinct: 3328 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3329 3330 operation_modifiers = [] 3331 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3332 operation_modifiers.append(exp.var(self._prev.text.upper())) 3333 3334 limit = self._parse_limit(top=True) 3335 projections = self._parse_projections() 3336 3337 this = self.expression( 3338 exp.Select, 3339 kind=kind, 3340 hint=hint, 3341 distinct=distinct, 3342 expressions=projections, 3343 limit=limit, 3344 operation_modifiers=operation_modifiers or None, 3345 ) 3346 this.comments = comments 3347 3348 into = self._parse_into() 3349 if into: 3350 this.set("into", into) 3351 3352 if not from_: 3353 from_ = self._parse_from() 3354 3355 if from_: 3356 this.set("from", from_) 3357 3358 this = self._parse_query_modifiers(this) 3359 elif (table or nested) and self._match(TokenType.L_PAREN): 3360 this = self._parse_wrapped_select(table=table) 3361 3362 # We return early here so that the UNION isn't attached to the subquery by the 3363 # following call to _parse_set_operations, but instead becomes the parent node 3364 self._match_r_paren() 3365 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3366 elif self._match(TokenType.VALUES, advance=False): 3367 this = self._parse_derived_table_values() 3368 elif from_: 3369 this = exp.select("*").from_(from_.this, copy=False) 3370 elif self._match(TokenType.SUMMARIZE): 3371 table = self._match(TokenType.TABLE) 3372 this = self._parse_select() or self._parse_string() or self._parse_table() 3373 return self.expression(exp.Summarize, this=this, table=table) 3374 elif self._match(TokenType.DESCRIBE): 3375 this = self._parse_describe() 3376 elif self._match_text_seq("STREAM"): 3377 this = self._parse_function() 3378 if this: 3379 this = self.expression(exp.Stream, this=this) 3380 else: 3381 self._retreat(self._index - 1) 3382 else: 3383 this = None 3384 3385 return self._parse_set_operations(this) if parse_set_operation else this 3386 3387 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3388 self._match_text_seq("SEARCH") 3389 3390 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3391 3392 if not kind: 3393 return None 3394 3395 self._match_text_seq("FIRST", "BY") 3396 3397 return self.expression( 3398 exp.RecursiveWithSearch, 3399 kind=kind, 3400 this=self._parse_id_var(), 3401 expression=self._match_text_seq("SET") and self._parse_id_var(), 3402 using=self._match_text_seq("USING") and self._parse_id_var(), 3403 ) 3404 3405 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3406 if not skip_with_token and not self._match(TokenType.WITH): 3407 return None 3408 3409 comments = self._prev_comments 3410 recursive = self._match(TokenType.RECURSIVE) 3411 3412 last_comments = None 3413 expressions = [] 3414 while True: 3415 cte = self._parse_cte() 3416 if isinstance(cte, exp.CTE): 3417 expressions.append(cte) 3418 if last_comments: 3419 cte.add_comments(last_comments) 3420 3421 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3422 break 3423 else: 3424 self._match(TokenType.WITH) 3425 3426 last_comments = self._prev_comments 3427 3428 return self.expression( 3429 exp.With, 3430 comments=comments, 3431 expressions=expressions, 3432 recursive=recursive, 3433 search=self._parse_recursive_with_search(), 3434 ) 3435 3436 def _parse_cte(self) -> t.Optional[exp.CTE]: 3437 index = self._index 3438 3439 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3440 if not alias or not alias.this: 3441 self.raise_error("Expected CTE to have alias") 3442 3443 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3444 self._retreat(index) 3445 return None 3446 3447 comments = self._prev_comments 3448 3449 if self._match_text_seq("NOT", "MATERIALIZED"): 3450 materialized = False 3451 elif self._match_text_seq("MATERIALIZED"): 3452 materialized = True 3453 else: 3454 materialized = None 3455 3456 cte = self.expression( 3457 exp.CTE, 3458 this=self._parse_wrapped(self._parse_statement), 3459 alias=alias, 3460 materialized=materialized, 3461 comments=comments, 3462 ) 3463 3464 values = cte.this 3465 if isinstance(values, exp.Values): 3466 if values.alias: 3467 cte.set("this", exp.select("*").from_(values)) 3468 else: 3469 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3470 3471 return cte 3472 3473 def _parse_table_alias( 3474 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3475 ) -> t.Optional[exp.TableAlias]: 3476 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3477 # so this section tries to parse the clause version and if it fails, it treats the token 3478 # as an identifier (alias) 3479 if self._can_parse_limit_or_offset(): 3480 return None 3481 3482 any_token = self._match(TokenType.ALIAS) 3483 alias = ( 3484 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3485 or self._parse_string_as_identifier() 3486 ) 3487 3488 index = self._index 3489 if self._match(TokenType.L_PAREN): 3490 columns = self._parse_csv(self._parse_function_parameter) 3491 self._match_r_paren() if columns else self._retreat(index) 3492 else: 3493 columns = None 3494 3495 if not alias and not columns: 3496 return None 3497 3498 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3499 3500 # We bubble up comments from the Identifier to the TableAlias 3501 if isinstance(alias, exp.Identifier): 3502 table_alias.add_comments(alias.pop_comments()) 3503 3504 return table_alias 3505 3506 def _parse_subquery( 3507 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3508 ) -> t.Optional[exp.Subquery]: 3509 if not this: 3510 return None 3511 3512 return self.expression( 3513 exp.Subquery, 3514 this=this, 3515 pivots=self._parse_pivots(), 3516 alias=self._parse_table_alias() if parse_alias else None, 3517 sample=self._parse_table_sample(), 3518 ) 3519 3520 def _implicit_unnests_to_explicit(self, this: E) -> E: 3521 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3522 3523 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3524 for i, join in enumerate(this.args.get("joins") or []): 3525 table = join.this 3526 normalized_table = table.copy() 3527 normalized_table.meta["maybe_column"] = True 3528 normalized_table = _norm(normalized_table, dialect=self.dialect) 3529 3530 if isinstance(table, exp.Table) and not join.args.get("on"): 3531 if normalized_table.parts[0].name in refs: 3532 table_as_column = table.to_column() 3533 unnest = exp.Unnest(expressions=[table_as_column]) 3534 3535 # Table.to_column creates a parent Alias node that we want to convert to 3536 # a TableAlias and attach to the Unnest, so it matches the parser's output 3537 if isinstance(table.args.get("alias"), exp.TableAlias): 3538 table_as_column.replace(table_as_column.this) 3539 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3540 3541 table.replace(unnest) 3542 3543 refs.add(normalized_table.alias_or_name) 3544 3545 return this 3546 3547 def _parse_query_modifiers( 3548 self, this: t.Optional[exp.Expression] 3549 ) -> t.Optional[exp.Expression]: 3550 if isinstance(this, self.MODIFIABLES): 3551 for join in self._parse_joins(): 3552 this.append("joins", join) 3553 for lateral in iter(self._parse_lateral, None): 3554 this.append("laterals", lateral) 3555 3556 while True: 3557 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3558 modifier_token = self._curr 3559 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3560 key, expression = parser(self) 3561 3562 if expression: 3563 if this.args.get(key): 3564 self.raise_error( 3565 f"Found multiple '{modifier_token.text.upper()}' clauses", 3566 token=modifier_token, 3567 ) 3568 3569 this.set(key, expression) 3570 if key == "limit": 3571 offset = expression.args.pop("offset", None) 3572 3573 if offset: 3574 offset = exp.Offset(expression=offset) 3575 this.set("offset", offset) 3576 3577 limit_by_expressions = expression.expressions 3578 expression.set("expressions", None) 3579 offset.set("expressions", limit_by_expressions) 3580 continue 3581 break 3582 3583 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3584 this = self._implicit_unnests_to_explicit(this) 3585 3586 return this 3587 3588 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3589 start = self._curr 3590 while self._curr: 3591 self._advance() 3592 3593 end = self._tokens[self._index - 1] 3594 return exp.Hint(expressions=[self._find_sql(start, end)]) 3595 3596 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3597 return self._parse_function_call() 3598 3599 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3600 start_index = self._index 3601 should_fallback_to_string = False 3602 3603 hints = [] 3604 try: 3605 for hint in iter( 3606 lambda: self._parse_csv( 3607 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3608 ), 3609 [], 3610 ): 3611 hints.extend(hint) 3612 except ParseError: 3613 should_fallback_to_string = True 3614 3615 if should_fallback_to_string or self._curr: 3616 self._retreat(start_index) 3617 return self._parse_hint_fallback_to_string() 3618 3619 return self.expression(exp.Hint, expressions=hints) 3620 3621 def _parse_hint(self) -> t.Optional[exp.Hint]: 3622 if self._match(TokenType.HINT) and self._prev_comments: 3623 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3624 3625 return None 3626 3627 def _parse_into(self) -> t.Optional[exp.Into]: 3628 if not self._match(TokenType.INTO): 3629 return None 3630 3631 temp = self._match(TokenType.TEMPORARY) 3632 unlogged = self._match_text_seq("UNLOGGED") 3633 self._match(TokenType.TABLE) 3634 3635 return self.expression( 3636 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3637 ) 3638 3639 def _parse_from( 3640 self, 3641 joins: bool = False, 3642 skip_from_token: bool = False, 3643 consume_pipe: bool = False, 3644 ) -> t.Optional[exp.From]: 3645 if not skip_from_token and not self._match(TokenType.FROM): 3646 return None 3647 3648 return self.expression( 3649 exp.From, 3650 comments=self._prev_comments, 3651 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3652 ) 3653 3654 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3655 return self.expression( 3656 exp.MatchRecognizeMeasure, 3657 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3658 this=self._parse_expression(), 3659 ) 3660 3661 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3662 if not self._match(TokenType.MATCH_RECOGNIZE): 3663 return None 3664 3665 self._match_l_paren() 3666 3667 partition = self._parse_partition_by() 3668 order = self._parse_order() 3669 3670 measures = ( 3671 self._parse_csv(self._parse_match_recognize_measure) 3672 if self._match_text_seq("MEASURES") 3673 else None 3674 ) 3675 3676 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3677 rows = exp.var("ONE ROW PER MATCH") 3678 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3679 text = "ALL ROWS PER MATCH" 3680 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3681 text += " SHOW EMPTY MATCHES" 3682 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3683 text += " OMIT EMPTY MATCHES" 3684 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3685 text += " WITH UNMATCHED ROWS" 3686 rows = exp.var(text) 3687 else: 3688 rows = None 3689 3690 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3691 text = "AFTER MATCH SKIP" 3692 if self._match_text_seq("PAST", "LAST", "ROW"): 3693 text += " PAST LAST ROW" 3694 elif self._match_text_seq("TO", "NEXT", "ROW"): 3695 text += " TO NEXT ROW" 3696 elif self._match_text_seq("TO", "FIRST"): 3697 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3698 elif self._match_text_seq("TO", "LAST"): 3699 text += f" TO LAST {self._advance_any().text}" # type: ignore 3700 after = exp.var(text) 3701 else: 3702 after = None 3703 3704 if self._match_text_seq("PATTERN"): 3705 self._match_l_paren() 3706 3707 if not self._curr: 3708 self.raise_error("Expecting )", self._curr) 3709 3710 paren = 1 3711 start = self._curr 3712 3713 while self._curr and paren > 0: 3714 if self._curr.token_type == TokenType.L_PAREN: 3715 paren += 1 3716 if self._curr.token_type == TokenType.R_PAREN: 3717 paren -= 1 3718 3719 end = self._prev 3720 self._advance() 3721 3722 if paren > 0: 3723 self.raise_error("Expecting )", self._curr) 3724 3725 pattern = exp.var(self._find_sql(start, end)) 3726 else: 3727 pattern = None 3728 3729 define = ( 3730 self._parse_csv(self._parse_name_as_expression) 3731 if self._match_text_seq("DEFINE") 3732 else None 3733 ) 3734 3735 self._match_r_paren() 3736 3737 return self.expression( 3738 exp.MatchRecognize, 3739 partition_by=partition, 3740 order=order, 3741 measures=measures, 3742 rows=rows, 3743 after=after, 3744 pattern=pattern, 3745 define=define, 3746 alias=self._parse_table_alias(), 3747 ) 3748 3749 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3750 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3751 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3752 cross_apply = False 3753 3754 if cross_apply is not None: 3755 this = self._parse_select(table=True) 3756 view = None 3757 outer = None 3758 elif self._match(TokenType.LATERAL): 3759 this = self._parse_select(table=True) 3760 view = self._match(TokenType.VIEW) 3761 outer = self._match(TokenType.OUTER) 3762 else: 3763 return None 3764 3765 if not this: 3766 this = ( 3767 self._parse_unnest() 3768 or self._parse_function() 3769 or self._parse_id_var(any_token=False) 3770 ) 3771 3772 while self._match(TokenType.DOT): 3773 this = exp.Dot( 3774 this=this, 3775 expression=self._parse_function() or self._parse_id_var(any_token=False), 3776 ) 3777 3778 ordinality: t.Optional[bool] = None 3779 3780 if view: 3781 table = self._parse_id_var(any_token=False) 3782 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3783 table_alias: t.Optional[exp.TableAlias] = self.expression( 3784 exp.TableAlias, this=table, columns=columns 3785 ) 3786 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3787 # We move the alias from the lateral's child node to the lateral itself 3788 table_alias = this.args["alias"].pop() 3789 else: 3790 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3791 table_alias = self._parse_table_alias() 3792 3793 return self.expression( 3794 exp.Lateral, 3795 this=this, 3796 view=view, 3797 outer=outer, 3798 alias=table_alias, 3799 cross_apply=cross_apply, 3800 ordinality=ordinality, 3801 ) 3802 3803 def _parse_join_parts( 3804 self, 3805 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3806 return ( 3807 self._match_set(self.JOIN_METHODS) and self._prev, 3808 self._match_set(self.JOIN_SIDES) and self._prev, 3809 self._match_set(self.JOIN_KINDS) and self._prev, 3810 ) 3811 3812 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3813 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3814 this = self._parse_column() 3815 if isinstance(this, exp.Column): 3816 return this.this 3817 return this 3818 3819 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3820 3821 def _parse_join( 3822 self, skip_join_token: bool = False, parse_bracket: bool = False 3823 ) -> t.Optional[exp.Join]: 3824 if self._match(TokenType.COMMA): 3825 table = self._try_parse(self._parse_table) 3826 cross_join = self.expression(exp.Join, this=table) if table else None 3827 3828 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3829 cross_join.set("kind", "CROSS") 3830 3831 return cross_join 3832 3833 index = self._index 3834 method, side, kind = self._parse_join_parts() 3835 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3836 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3837 join_comments = self._prev_comments 3838 3839 if not skip_join_token and not join: 3840 self._retreat(index) 3841 kind = None 3842 method = None 3843 side = None 3844 3845 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3846 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3847 3848 if not skip_join_token and not join and not outer_apply and not cross_apply: 3849 return None 3850 3851 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3852 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3853 kwargs["expressions"] = self._parse_csv( 3854 lambda: self._parse_table(parse_bracket=parse_bracket) 3855 ) 3856 3857 if method: 3858 kwargs["method"] = method.text 3859 if side: 3860 kwargs["side"] = side.text 3861 if kind: 3862 kwargs["kind"] = kind.text 3863 if hint: 3864 kwargs["hint"] = hint 3865 3866 if self._match(TokenType.MATCH_CONDITION): 3867 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3868 3869 if self._match(TokenType.ON): 3870 kwargs["on"] = self._parse_assignment() 3871 elif self._match(TokenType.USING): 3872 kwargs["using"] = self._parse_using_identifiers() 3873 elif ( 3874 not method 3875 and not (outer_apply or cross_apply) 3876 and not isinstance(kwargs["this"], exp.Unnest) 3877 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3878 ): 3879 index = self._index 3880 joins: t.Optional[list] = list(self._parse_joins()) 3881 3882 if joins and self._match(TokenType.ON): 3883 kwargs["on"] = self._parse_assignment() 3884 elif joins and self._match(TokenType.USING): 3885 kwargs["using"] = self._parse_using_identifiers() 3886 else: 3887 joins = None 3888 self._retreat(index) 3889 3890 kwargs["this"].set("joins", joins if joins else None) 3891 3892 kwargs["pivots"] = self._parse_pivots() 3893 3894 comments = [c for token in (method, side, kind) if token for c in token.comments] 3895 comments = (join_comments or []) + comments 3896 3897 if ( 3898 self.ADD_JOIN_ON_TRUE 3899 and not kwargs.get("on") 3900 and not kwargs.get("using") 3901 and not kwargs.get("method") 3902 and kwargs.get("kind") in (None, "INNER", "OUTER") 3903 ): 3904 kwargs["on"] = exp.true() 3905 3906 return self.expression(exp.Join, comments=comments, **kwargs) 3907 3908 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3909 this = self._parse_assignment() 3910 3911 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3912 return this 3913 3914 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3915 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3916 3917 return this 3918 3919 def _parse_index_params(self) -> exp.IndexParameters: 3920 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3921 3922 if self._match(TokenType.L_PAREN, advance=False): 3923 columns = self._parse_wrapped_csv(self._parse_with_operator) 3924 else: 3925 columns = None 3926 3927 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3928 partition_by = self._parse_partition_by() 3929 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3930 tablespace = ( 3931 self._parse_var(any_token=True) 3932 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3933 else None 3934 ) 3935 where = self._parse_where() 3936 3937 on = self._parse_field() if self._match(TokenType.ON) else None 3938 3939 return self.expression( 3940 exp.IndexParameters, 3941 using=using, 3942 columns=columns, 3943 include=include, 3944 partition_by=partition_by, 3945 where=where, 3946 with_storage=with_storage, 3947 tablespace=tablespace, 3948 on=on, 3949 ) 3950 3951 def _parse_index( 3952 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3953 ) -> t.Optional[exp.Index]: 3954 if index or anonymous: 3955 unique = None 3956 primary = None 3957 amp = None 3958 3959 self._match(TokenType.ON) 3960 self._match(TokenType.TABLE) # hive 3961 table = self._parse_table_parts(schema=True) 3962 else: 3963 unique = self._match(TokenType.UNIQUE) 3964 primary = self._match_text_seq("PRIMARY") 3965 amp = self._match_text_seq("AMP") 3966 3967 if not self._match(TokenType.INDEX): 3968 return None 3969 3970 index = self._parse_id_var() 3971 table = None 3972 3973 params = self._parse_index_params() 3974 3975 return self.expression( 3976 exp.Index, 3977 this=index, 3978 table=table, 3979 unique=unique, 3980 primary=primary, 3981 amp=amp, 3982 params=params, 3983 ) 3984 3985 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3986 hints: t.List[exp.Expression] = [] 3987 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3988 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3989 hints.append( 3990 self.expression( 3991 exp.WithTableHint, 3992 expressions=self._parse_csv( 3993 lambda: self._parse_function() or self._parse_var(any_token=True) 3994 ), 3995 ) 3996 ) 3997 self._match_r_paren() 3998 else: 3999 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4000 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4001 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4002 4003 self._match_set((TokenType.INDEX, TokenType.KEY)) 4004 if self._match(TokenType.FOR): 4005 hint.set("target", self._advance_any() and self._prev.text.upper()) 4006 4007 hint.set("expressions", self._parse_wrapped_id_vars()) 4008 hints.append(hint) 4009 4010 return hints or None 4011 4012 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4013 return ( 4014 (not schema and self._parse_function(optional_parens=False)) 4015 or self._parse_id_var(any_token=False) 4016 or self._parse_string_as_identifier() 4017 or self._parse_placeholder() 4018 ) 4019 4020 def _parse_table_parts( 4021 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4022 ) -> exp.Table: 4023 catalog = None 4024 db = None 4025 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4026 4027 while self._match(TokenType.DOT): 4028 if catalog: 4029 # This allows nesting the table in arbitrarily many dot expressions if needed 4030 table = self.expression( 4031 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4032 ) 4033 else: 4034 catalog = db 4035 db = table 4036 # "" used for tsql FROM a..b case 4037 table = self._parse_table_part(schema=schema) or "" 4038 4039 if ( 4040 wildcard 4041 and self._is_connected() 4042 and (isinstance(table, exp.Identifier) or not table) 4043 and self._match(TokenType.STAR) 4044 ): 4045 if isinstance(table, exp.Identifier): 4046 table.args["this"] += "*" 4047 else: 4048 table = exp.Identifier(this="*") 4049 4050 # We bubble up comments from the Identifier to the Table 4051 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4052 4053 if is_db_reference: 4054 catalog = db 4055 db = table 4056 table = None 4057 4058 if not table and not is_db_reference: 4059 self.raise_error(f"Expected table name but got {self._curr}") 4060 if not db and is_db_reference: 4061 self.raise_error(f"Expected database name but got {self._curr}") 4062 4063 table = self.expression( 4064 exp.Table, 4065 comments=comments, 4066 this=table, 4067 db=db, 4068 catalog=catalog, 4069 ) 4070 4071 changes = self._parse_changes() 4072 if changes: 4073 table.set("changes", changes) 4074 4075 at_before = self._parse_historical_data() 4076 if at_before: 4077 table.set("when", at_before) 4078 4079 pivots = self._parse_pivots() 4080 if pivots: 4081 table.set("pivots", pivots) 4082 4083 return table 4084 4085 def _parse_table( 4086 self, 4087 schema: bool = False, 4088 joins: bool = False, 4089 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4090 parse_bracket: bool = False, 4091 is_db_reference: bool = False, 4092 parse_partition: bool = False, 4093 consume_pipe: bool = False, 4094 ) -> t.Optional[exp.Expression]: 4095 lateral = self._parse_lateral() 4096 if lateral: 4097 return lateral 4098 4099 unnest = self._parse_unnest() 4100 if unnest: 4101 return unnest 4102 4103 values = self._parse_derived_table_values() 4104 if values: 4105 return values 4106 4107 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4108 if subquery: 4109 if not subquery.args.get("pivots"): 4110 subquery.set("pivots", self._parse_pivots()) 4111 return subquery 4112 4113 bracket = parse_bracket and self._parse_bracket(None) 4114 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4115 4116 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4117 self._parse_table 4118 ) 4119 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4120 4121 only = self._match(TokenType.ONLY) 4122 4123 this = t.cast( 4124 exp.Expression, 4125 bracket 4126 or rows_from 4127 or self._parse_bracket( 4128 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4129 ), 4130 ) 4131 4132 if only: 4133 this.set("only", only) 4134 4135 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4136 self._match_text_seq("*") 4137 4138 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4139 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4140 this.set("partition", self._parse_partition()) 4141 4142 if schema: 4143 return self._parse_schema(this=this) 4144 4145 version = self._parse_version() 4146 4147 if version: 4148 this.set("version", version) 4149 4150 if self.dialect.ALIAS_POST_TABLESAMPLE: 4151 this.set("sample", self._parse_table_sample()) 4152 4153 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4154 if alias: 4155 this.set("alias", alias) 4156 4157 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4158 return self.expression( 4159 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4160 ) 4161 4162 this.set("hints", self._parse_table_hints()) 4163 4164 if not this.args.get("pivots"): 4165 this.set("pivots", self._parse_pivots()) 4166 4167 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4168 this.set("sample", self._parse_table_sample()) 4169 4170 if joins: 4171 for join in self._parse_joins(): 4172 this.append("joins", join) 4173 4174 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4175 this.set("ordinality", True) 4176 this.set("alias", self._parse_table_alias()) 4177 4178 return this 4179 4180 def _parse_version(self) -> t.Optional[exp.Version]: 4181 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4182 this = "TIMESTAMP" 4183 elif self._match(TokenType.VERSION_SNAPSHOT): 4184 this = "VERSION" 4185 else: 4186 return None 4187 4188 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4189 kind = self._prev.text.upper() 4190 start = self._parse_bitwise() 4191 self._match_texts(("TO", "AND")) 4192 end = self._parse_bitwise() 4193 expression: t.Optional[exp.Expression] = self.expression( 4194 exp.Tuple, expressions=[start, end] 4195 ) 4196 elif self._match_text_seq("CONTAINED", "IN"): 4197 kind = "CONTAINED IN" 4198 expression = self.expression( 4199 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4200 ) 4201 elif self._match(TokenType.ALL): 4202 kind = "ALL" 4203 expression = None 4204 else: 4205 self._match_text_seq("AS", "OF") 4206 kind = "AS OF" 4207 expression = self._parse_type() 4208 4209 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4210 4211 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4212 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4213 index = self._index 4214 historical_data = None 4215 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4216 this = self._prev.text.upper() 4217 kind = ( 4218 self._match(TokenType.L_PAREN) 4219 and self._match_texts(self.HISTORICAL_DATA_KIND) 4220 and self._prev.text.upper() 4221 ) 4222 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4223 4224 if expression: 4225 self._match_r_paren() 4226 historical_data = self.expression( 4227 exp.HistoricalData, this=this, kind=kind, expression=expression 4228 ) 4229 else: 4230 self._retreat(index) 4231 4232 return historical_data 4233 4234 def _parse_changes(self) -> t.Optional[exp.Changes]: 4235 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4236 return None 4237 4238 information = self._parse_var(any_token=True) 4239 self._match_r_paren() 4240 4241 return self.expression( 4242 exp.Changes, 4243 information=information, 4244 at_before=self._parse_historical_data(), 4245 end=self._parse_historical_data(), 4246 ) 4247 4248 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4249 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4250 return None 4251 4252 self._advance() 4253 4254 expressions = self._parse_wrapped_csv(self._parse_equality) 4255 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4256 4257 alias = self._parse_table_alias() if with_alias else None 4258 4259 if alias: 4260 if self.dialect.UNNEST_COLUMN_ONLY: 4261 if alias.args.get("columns"): 4262 self.raise_error("Unexpected extra column alias in unnest.") 4263 4264 alias.set("columns", [alias.this]) 4265 alias.set("this", None) 4266 4267 columns = alias.args.get("columns") or [] 4268 if offset and len(expressions) < len(columns): 4269 offset = columns.pop() 4270 4271 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4272 self._match(TokenType.ALIAS) 4273 offset = self._parse_id_var( 4274 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4275 ) or exp.to_identifier("offset") 4276 4277 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4278 4279 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4280 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4281 if not is_derived and not ( 4282 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4283 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4284 ): 4285 return None 4286 4287 expressions = self._parse_csv(self._parse_value) 4288 alias = self._parse_table_alias() 4289 4290 if is_derived: 4291 self._match_r_paren() 4292 4293 return self.expression( 4294 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4295 ) 4296 4297 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4298 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4299 as_modifier and self._match_text_seq("USING", "SAMPLE") 4300 ): 4301 return None 4302 4303 bucket_numerator = None 4304 bucket_denominator = None 4305 bucket_field = None 4306 percent = None 4307 size = None 4308 seed = None 4309 4310 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4311 matched_l_paren = self._match(TokenType.L_PAREN) 4312 4313 if self.TABLESAMPLE_CSV: 4314 num = None 4315 expressions = self._parse_csv(self._parse_primary) 4316 else: 4317 expressions = None 4318 num = ( 4319 self._parse_factor() 4320 if self._match(TokenType.NUMBER, advance=False) 4321 else self._parse_primary() or self._parse_placeholder() 4322 ) 4323 4324 if self._match_text_seq("BUCKET"): 4325 bucket_numerator = self._parse_number() 4326 self._match_text_seq("OUT", "OF") 4327 bucket_denominator = bucket_denominator = self._parse_number() 4328 self._match(TokenType.ON) 4329 bucket_field = self._parse_field() 4330 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4331 percent = num 4332 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4333 size = num 4334 else: 4335 percent = num 4336 4337 if matched_l_paren: 4338 self._match_r_paren() 4339 4340 if self._match(TokenType.L_PAREN): 4341 method = self._parse_var(upper=True) 4342 seed = self._match(TokenType.COMMA) and self._parse_number() 4343 self._match_r_paren() 4344 elif self._match_texts(("SEED", "REPEATABLE")): 4345 seed = self._parse_wrapped(self._parse_number) 4346 4347 if not method and self.DEFAULT_SAMPLING_METHOD: 4348 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4349 4350 return self.expression( 4351 exp.TableSample, 4352 expressions=expressions, 4353 method=method, 4354 bucket_numerator=bucket_numerator, 4355 bucket_denominator=bucket_denominator, 4356 bucket_field=bucket_field, 4357 percent=percent, 4358 size=size, 4359 seed=seed, 4360 ) 4361 4362 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4363 return list(iter(self._parse_pivot, None)) or None 4364 4365 def _parse_joins(self) -> t.Iterator[exp.Join]: 4366 return iter(self._parse_join, None) 4367 4368 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4369 if not self._match(TokenType.INTO): 4370 return None 4371 4372 return self.expression( 4373 exp.UnpivotColumns, 4374 this=self._match_text_seq("NAME") and self._parse_column(), 4375 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4376 ) 4377 4378 # https://duckdb.org/docs/sql/statements/pivot 4379 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4380 def _parse_on() -> t.Optional[exp.Expression]: 4381 this = self._parse_bitwise() 4382 4383 if self._match(TokenType.IN): 4384 # PIVOT ... ON col IN (row_val1, row_val2) 4385 return self._parse_in(this) 4386 if self._match(TokenType.ALIAS, advance=False): 4387 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4388 return self._parse_alias(this) 4389 4390 return this 4391 4392 this = self._parse_table() 4393 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4394 into = self._parse_unpivot_columns() 4395 using = self._match(TokenType.USING) and self._parse_csv( 4396 lambda: self._parse_alias(self._parse_function()) 4397 ) 4398 group = self._parse_group() 4399 4400 return self.expression( 4401 exp.Pivot, 4402 this=this, 4403 expressions=expressions, 4404 using=using, 4405 group=group, 4406 unpivot=is_unpivot, 4407 into=into, 4408 ) 4409 4410 def _parse_pivot_in(self) -> exp.In: 4411 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4412 this = self._parse_select_or_expression() 4413 4414 self._match(TokenType.ALIAS) 4415 alias = self._parse_bitwise() 4416 if alias: 4417 if isinstance(alias, exp.Column) and not alias.db: 4418 alias = alias.this 4419 return self.expression(exp.PivotAlias, this=this, alias=alias) 4420 4421 return this 4422 4423 value = self._parse_column() 4424 4425 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4426 self.raise_error("Expecting IN (") 4427 4428 if self._match(TokenType.ANY): 4429 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4430 else: 4431 exprs = self._parse_csv(_parse_aliased_expression) 4432 4433 self._match_r_paren() 4434 return self.expression(exp.In, this=value, expressions=exprs) 4435 4436 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4437 func = self._parse_function() 4438 if not func: 4439 if self._prev and self._prev.token_type == TokenType.COMMA: 4440 return None 4441 self.raise_error("Expecting an aggregation function in PIVOT") 4442 4443 return self._parse_alias(func) 4444 4445 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4446 index = self._index 4447 include_nulls = None 4448 4449 if self._match(TokenType.PIVOT): 4450 unpivot = False 4451 elif self._match(TokenType.UNPIVOT): 4452 unpivot = True 4453 4454 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4455 if self._match_text_seq("INCLUDE", "NULLS"): 4456 include_nulls = True 4457 elif self._match_text_seq("EXCLUDE", "NULLS"): 4458 include_nulls = False 4459 else: 4460 return None 4461 4462 expressions = [] 4463 4464 if not self._match(TokenType.L_PAREN): 4465 self._retreat(index) 4466 return None 4467 4468 if unpivot: 4469 expressions = self._parse_csv(self._parse_column) 4470 else: 4471 expressions = self._parse_csv(self._parse_pivot_aggregation) 4472 4473 if not expressions: 4474 self.raise_error("Failed to parse PIVOT's aggregation list") 4475 4476 if not self._match(TokenType.FOR): 4477 self.raise_error("Expecting FOR") 4478 4479 fields = [] 4480 while True: 4481 field = self._try_parse(self._parse_pivot_in) 4482 if not field: 4483 break 4484 fields.append(field) 4485 4486 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4487 self._parse_bitwise 4488 ) 4489 4490 group = self._parse_group() 4491 4492 self._match_r_paren() 4493 4494 pivot = self.expression( 4495 exp.Pivot, 4496 expressions=expressions, 4497 fields=fields, 4498 unpivot=unpivot, 4499 include_nulls=include_nulls, 4500 default_on_null=default_on_null, 4501 group=group, 4502 ) 4503 4504 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4505 pivot.set("alias", self._parse_table_alias()) 4506 4507 if not unpivot: 4508 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4509 4510 columns: t.List[exp.Expression] = [] 4511 all_fields = [] 4512 for pivot_field in pivot.fields: 4513 pivot_field_expressions = pivot_field.expressions 4514 4515 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4516 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4517 continue 4518 4519 all_fields.append( 4520 [ 4521 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4522 for fld in pivot_field_expressions 4523 ] 4524 ) 4525 4526 if all_fields: 4527 if names: 4528 all_fields.append(names) 4529 4530 # Generate all possible combinations of the pivot columns 4531 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4532 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4533 for fld_parts_tuple in itertools.product(*all_fields): 4534 fld_parts = list(fld_parts_tuple) 4535 4536 if names and self.PREFIXED_PIVOT_COLUMNS: 4537 # Move the "name" to the front of the list 4538 fld_parts.insert(0, fld_parts.pop(-1)) 4539 4540 columns.append(exp.to_identifier("_".join(fld_parts))) 4541 4542 pivot.set("columns", columns) 4543 4544 return pivot 4545 4546 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4547 return [agg.alias for agg in aggregations if agg.alias] 4548 4549 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4550 if not skip_where_token and not self._match(TokenType.PREWHERE): 4551 return None 4552 4553 return self.expression( 4554 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4555 ) 4556 4557 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4558 if not skip_where_token and not self._match(TokenType.WHERE): 4559 return None 4560 4561 return self.expression( 4562 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4563 ) 4564 4565 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4566 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4567 return None 4568 comments = self._prev_comments 4569 4570 elements: t.Dict[str, t.Any] = defaultdict(list) 4571 4572 if self._match(TokenType.ALL): 4573 elements["all"] = True 4574 elif self._match(TokenType.DISTINCT): 4575 elements["all"] = False 4576 4577 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4578 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4579 4580 while True: 4581 index = self._index 4582 4583 elements["expressions"].extend( 4584 self._parse_csv( 4585 lambda: None 4586 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4587 else self._parse_assignment() 4588 ) 4589 ) 4590 4591 before_with_index = self._index 4592 with_prefix = self._match(TokenType.WITH) 4593 4594 if self._match(TokenType.ROLLUP): 4595 elements["rollup"].append( 4596 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4597 ) 4598 elif self._match(TokenType.CUBE): 4599 elements["cube"].append( 4600 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4601 ) 4602 elif self._match(TokenType.GROUPING_SETS): 4603 elements["grouping_sets"].append( 4604 self.expression( 4605 exp.GroupingSets, 4606 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4607 ) 4608 ) 4609 elif self._match_text_seq("TOTALS"): 4610 elements["totals"] = True # type: ignore 4611 4612 if before_with_index <= self._index <= before_with_index + 1: 4613 self._retreat(before_with_index) 4614 break 4615 4616 if index == self._index: 4617 break 4618 4619 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4620 4621 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4622 return self.expression( 4623 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4624 ) 4625 4626 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4627 if self._match(TokenType.L_PAREN): 4628 grouping_set = self._parse_csv(self._parse_bitwise) 4629 self._match_r_paren() 4630 return self.expression(exp.Tuple, expressions=grouping_set) 4631 4632 return self._parse_column() 4633 4634 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4635 if not skip_having_token and not self._match(TokenType.HAVING): 4636 return None 4637 return self.expression( 4638 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4639 ) 4640 4641 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4642 if not self._match(TokenType.QUALIFY): 4643 return None 4644 return self.expression(exp.Qualify, this=self._parse_assignment()) 4645 4646 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4647 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4648 exp.Prior, this=self._parse_bitwise() 4649 ) 4650 connect = self._parse_assignment() 4651 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4652 return connect 4653 4654 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4655 if skip_start_token: 4656 start = None 4657 elif self._match(TokenType.START_WITH): 4658 start = self._parse_assignment() 4659 else: 4660 return None 4661 4662 self._match(TokenType.CONNECT_BY) 4663 nocycle = self._match_text_seq("NOCYCLE") 4664 connect = self._parse_connect_with_prior() 4665 4666 if not start and self._match(TokenType.START_WITH): 4667 start = self._parse_assignment() 4668 4669 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4670 4671 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4672 this = self._parse_id_var(any_token=True) 4673 if self._match(TokenType.ALIAS): 4674 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4675 return this 4676 4677 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4678 if self._match_text_seq("INTERPOLATE"): 4679 return self._parse_wrapped_csv(self._parse_name_as_expression) 4680 return None 4681 4682 def _parse_order( 4683 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4684 ) -> t.Optional[exp.Expression]: 4685 siblings = None 4686 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4687 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4688 return this 4689 4690 siblings = True 4691 4692 return self.expression( 4693 exp.Order, 4694 comments=self._prev_comments, 4695 this=this, 4696 expressions=self._parse_csv(self._parse_ordered), 4697 siblings=siblings, 4698 ) 4699 4700 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4701 if not self._match(token): 4702 return None 4703 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4704 4705 def _parse_ordered( 4706 self, parse_method: t.Optional[t.Callable] = None 4707 ) -> t.Optional[exp.Ordered]: 4708 this = parse_method() if parse_method else self._parse_assignment() 4709 if not this: 4710 return None 4711 4712 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4713 this = exp.var("ALL") 4714 4715 asc = self._match(TokenType.ASC) 4716 desc = self._match(TokenType.DESC) or (asc and False) 4717 4718 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4719 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4720 4721 nulls_first = is_nulls_first or False 4722 explicitly_null_ordered = is_nulls_first or is_nulls_last 4723 4724 if ( 4725 not explicitly_null_ordered 4726 and ( 4727 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4728 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4729 ) 4730 and self.dialect.NULL_ORDERING != "nulls_are_last" 4731 ): 4732 nulls_first = True 4733 4734 if self._match_text_seq("WITH", "FILL"): 4735 with_fill = self.expression( 4736 exp.WithFill, 4737 **{ # type: ignore 4738 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4739 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4740 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4741 "interpolate": self._parse_interpolate(), 4742 }, 4743 ) 4744 else: 4745 with_fill = None 4746 4747 return self.expression( 4748 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4749 ) 4750 4751 def _parse_limit_options(self) -> exp.LimitOptions: 4752 percent = self._match(TokenType.PERCENT) 4753 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4754 self._match_text_seq("ONLY") 4755 with_ties = self._match_text_seq("WITH", "TIES") 4756 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4757 4758 def _parse_limit( 4759 self, 4760 this: t.Optional[exp.Expression] = None, 4761 top: bool = False, 4762 skip_limit_token: bool = False, 4763 ) -> t.Optional[exp.Expression]: 4764 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4765 comments = self._prev_comments 4766 if top: 4767 limit_paren = self._match(TokenType.L_PAREN) 4768 expression = self._parse_term() if limit_paren else self._parse_number() 4769 4770 if limit_paren: 4771 self._match_r_paren() 4772 4773 limit_options = self._parse_limit_options() 4774 else: 4775 limit_options = None 4776 expression = self._parse_term() 4777 4778 if self._match(TokenType.COMMA): 4779 offset = expression 4780 expression = self._parse_term() 4781 else: 4782 offset = None 4783 4784 limit_exp = self.expression( 4785 exp.Limit, 4786 this=this, 4787 expression=expression, 4788 offset=offset, 4789 comments=comments, 4790 limit_options=limit_options, 4791 expressions=self._parse_limit_by(), 4792 ) 4793 4794 return limit_exp 4795 4796 if self._match(TokenType.FETCH): 4797 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4798 direction = self._prev.text.upper() if direction else "FIRST" 4799 4800 count = self._parse_field(tokens=self.FETCH_TOKENS) 4801 4802 return self.expression( 4803 exp.Fetch, 4804 direction=direction, 4805 count=count, 4806 limit_options=self._parse_limit_options(), 4807 ) 4808 4809 return this 4810 4811 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4812 if not self._match(TokenType.OFFSET): 4813 return this 4814 4815 count = self._parse_term() 4816 self._match_set((TokenType.ROW, TokenType.ROWS)) 4817 4818 return self.expression( 4819 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4820 ) 4821 4822 def _can_parse_limit_or_offset(self) -> bool: 4823 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4824 return False 4825 4826 index = self._index 4827 result = bool( 4828 self._try_parse(self._parse_limit, retreat=True) 4829 or self._try_parse(self._parse_offset, retreat=True) 4830 ) 4831 self._retreat(index) 4832 return result 4833 4834 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4835 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4836 4837 def _parse_locks(self) -> t.List[exp.Lock]: 4838 locks = [] 4839 while True: 4840 update, key = None, None 4841 if self._match_text_seq("FOR", "UPDATE"): 4842 update = True 4843 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4844 "LOCK", "IN", "SHARE", "MODE" 4845 ): 4846 update = False 4847 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4848 update, key = False, True 4849 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4850 update, key = True, True 4851 else: 4852 break 4853 4854 expressions = None 4855 if self._match_text_seq("OF"): 4856 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4857 4858 wait: t.Optional[bool | exp.Expression] = None 4859 if self._match_text_seq("NOWAIT"): 4860 wait = True 4861 elif self._match_text_seq("WAIT"): 4862 wait = self._parse_primary() 4863 elif self._match_text_seq("SKIP", "LOCKED"): 4864 wait = False 4865 4866 locks.append( 4867 self.expression( 4868 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4869 ) 4870 ) 4871 4872 return locks 4873 4874 def parse_set_operation( 4875 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4876 ) -> t.Optional[exp.Expression]: 4877 start = self._index 4878 _, side_token, kind_token = self._parse_join_parts() 4879 4880 side = side_token.text if side_token else None 4881 kind = kind_token.text if kind_token else None 4882 4883 if not self._match_set(self.SET_OPERATIONS): 4884 self._retreat(start) 4885 return None 4886 4887 token_type = self._prev.token_type 4888 4889 if token_type == TokenType.UNION: 4890 operation: t.Type[exp.SetOperation] = exp.Union 4891 elif token_type == TokenType.EXCEPT: 4892 operation = exp.Except 4893 else: 4894 operation = exp.Intersect 4895 4896 comments = self._prev.comments 4897 4898 if self._match(TokenType.DISTINCT): 4899 distinct: t.Optional[bool] = True 4900 elif self._match(TokenType.ALL): 4901 distinct = False 4902 else: 4903 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4904 if distinct is None: 4905 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4906 4907 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4908 "STRICT", "CORRESPONDING" 4909 ) 4910 if self._match_text_seq("CORRESPONDING"): 4911 by_name = True 4912 if not side and not kind: 4913 kind = "INNER" 4914 4915 on_column_list = None 4916 if by_name and self._match_texts(("ON", "BY")): 4917 on_column_list = self._parse_wrapped_csv(self._parse_column) 4918 4919 expression = self._parse_select( 4920 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4921 ) 4922 4923 return self.expression( 4924 operation, 4925 comments=comments, 4926 this=this, 4927 distinct=distinct, 4928 by_name=by_name, 4929 expression=expression, 4930 side=side, 4931 kind=kind, 4932 on=on_column_list, 4933 ) 4934 4935 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4936 while this: 4937 setop = self.parse_set_operation(this) 4938 if not setop: 4939 break 4940 this = setop 4941 4942 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4943 expression = this.expression 4944 4945 if expression: 4946 for arg in self.SET_OP_MODIFIERS: 4947 expr = expression.args.get(arg) 4948 if expr: 4949 this.set(arg, expr.pop()) 4950 4951 return this 4952 4953 def _parse_expression(self) -> t.Optional[exp.Expression]: 4954 return self._parse_alias(self._parse_assignment()) 4955 4956 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_disjunction() 4958 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4959 # This allows us to parse <non-identifier token> := <expr> 4960 this = exp.column( 4961 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4962 ) 4963 4964 while self._match_set(self.ASSIGNMENT): 4965 if isinstance(this, exp.Column) and len(this.parts) == 1: 4966 this = this.this 4967 4968 this = self.expression( 4969 self.ASSIGNMENT[self._prev.token_type], 4970 this=this, 4971 comments=self._prev_comments, 4972 expression=self._parse_assignment(), 4973 ) 4974 4975 return this 4976 4977 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4978 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4979 4980 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4981 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4982 4983 def _parse_equality(self) -> t.Optional[exp.Expression]: 4984 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4985 4986 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4987 return self._parse_tokens(self._parse_range, self.COMPARISON) 4988 4989 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4990 this = this or self._parse_bitwise() 4991 negate = self._match(TokenType.NOT) 4992 4993 if self._match_set(self.RANGE_PARSERS): 4994 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4995 if not expression: 4996 return this 4997 4998 this = expression 4999 elif self._match(TokenType.ISNULL): 5000 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5001 5002 # Postgres supports ISNULL and NOTNULL for conditions. 5003 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5004 if self._match(TokenType.NOTNULL): 5005 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5006 this = self.expression(exp.Not, this=this) 5007 5008 if negate: 5009 this = self._negate_range(this) 5010 5011 if self._match(TokenType.IS): 5012 this = self._parse_is(this) 5013 5014 return this 5015 5016 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5017 if not this: 5018 return this 5019 5020 return self.expression(exp.Not, this=this) 5021 5022 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5023 index = self._index - 1 5024 negate = self._match(TokenType.NOT) 5025 5026 if self._match_text_seq("DISTINCT", "FROM"): 5027 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5028 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5029 5030 if self._match(TokenType.JSON): 5031 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5032 5033 if self._match_text_seq("WITH"): 5034 _with = True 5035 elif self._match_text_seq("WITHOUT"): 5036 _with = False 5037 else: 5038 _with = None 5039 5040 unique = self._match(TokenType.UNIQUE) 5041 self._match_text_seq("KEYS") 5042 expression: t.Optional[exp.Expression] = self.expression( 5043 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5044 ) 5045 else: 5046 expression = self._parse_primary() or self._parse_null() 5047 if not expression: 5048 self._retreat(index) 5049 return None 5050 5051 this = self.expression(exp.Is, this=this, expression=expression) 5052 return self.expression(exp.Not, this=this) if negate else this 5053 5054 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5055 unnest = self._parse_unnest(with_alias=False) 5056 if unnest: 5057 this = self.expression(exp.In, this=this, unnest=unnest) 5058 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5059 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5060 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5061 5062 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5063 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5064 else: 5065 this = self.expression(exp.In, this=this, expressions=expressions) 5066 5067 if matched_l_paren: 5068 self._match_r_paren(this) 5069 elif not self._match(TokenType.R_BRACKET, expression=this): 5070 self.raise_error("Expecting ]") 5071 else: 5072 this = self.expression(exp.In, this=this, field=self._parse_column()) 5073 5074 return this 5075 5076 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5077 symmetric = None 5078 if self._match_text_seq("SYMMETRIC"): 5079 symmetric = True 5080 elif self._match_text_seq("ASYMMETRIC"): 5081 symmetric = False 5082 5083 low = self._parse_bitwise() 5084 self._match(TokenType.AND) 5085 high = self._parse_bitwise() 5086 5087 return self.expression( 5088 exp.Between, 5089 this=this, 5090 low=low, 5091 high=high, 5092 symmetric=symmetric, 5093 ) 5094 5095 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5096 if not self._match(TokenType.ESCAPE): 5097 return this 5098 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5099 5100 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5101 index = self._index 5102 5103 if not self._match(TokenType.INTERVAL) and match_interval: 5104 return None 5105 5106 if self._match(TokenType.STRING, advance=False): 5107 this = self._parse_primary() 5108 else: 5109 this = self._parse_term() 5110 5111 if not this or ( 5112 isinstance(this, exp.Column) 5113 and not this.table 5114 and not this.this.quoted 5115 and self._curr 5116 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5117 ): 5118 self._retreat(index) 5119 return None 5120 5121 # handle day-time format interval span with omitted units: 5122 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5123 interval_span_units_omitted = None 5124 if ( 5125 this 5126 and this.is_string 5127 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5128 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5129 ): 5130 index = self._index 5131 5132 # Var "TO" Var 5133 first_unit = self._parse_var(any_token=True, upper=True) 5134 second_unit = None 5135 if first_unit and self._match_text_seq("TO"): 5136 second_unit = self._parse_var(any_token=True, upper=True) 5137 5138 interval_span_units_omitted = not (first_unit and second_unit) 5139 5140 self._retreat(index) 5141 5142 unit = ( 5143 None 5144 if interval_span_units_omitted 5145 else ( 5146 self._parse_function() 5147 or ( 5148 not self._match(TokenType.ALIAS, advance=False) 5149 and self._parse_var(any_token=True, upper=True) 5150 ) 5151 ) 5152 ) 5153 5154 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5155 # each INTERVAL expression into this canonical form so it's easy to transpile 5156 if this and this.is_number: 5157 this = exp.Literal.string(this.to_py()) 5158 elif this and this.is_string: 5159 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5160 if parts and unit: 5161 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5162 unit = None 5163 self._retreat(self._index - 1) 5164 5165 if len(parts) == 1: 5166 this = exp.Literal.string(parts[0][0]) 5167 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5168 5169 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5170 unit = self.expression( 5171 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5172 ) 5173 5174 interval = self.expression(exp.Interval, this=this, unit=unit) 5175 5176 index = self._index 5177 self._match(TokenType.PLUS) 5178 5179 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5180 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5181 return self.expression( 5182 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5183 ) 5184 5185 self._retreat(index) 5186 return interval 5187 5188 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5189 this = self._parse_term() 5190 5191 while True: 5192 if self._match_set(self.BITWISE): 5193 this = self.expression( 5194 self.BITWISE[self._prev.token_type], 5195 this=this, 5196 expression=self._parse_term(), 5197 ) 5198 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5199 this = self.expression( 5200 exp.DPipe, 5201 this=this, 5202 expression=self._parse_term(), 5203 safe=not self.dialect.STRICT_STRING_CONCAT, 5204 ) 5205 elif self._match(TokenType.DQMARK): 5206 this = self.expression( 5207 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5208 ) 5209 elif self._match_pair(TokenType.LT, TokenType.LT): 5210 this = self.expression( 5211 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5212 ) 5213 elif self._match_pair(TokenType.GT, TokenType.GT): 5214 this = self.expression( 5215 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5216 ) 5217 else: 5218 break 5219 5220 return this 5221 5222 def _parse_term(self) -> t.Optional[exp.Expression]: 5223 this = self._parse_factor() 5224 5225 while self._match_set(self.TERM): 5226 klass = self.TERM[self._prev.token_type] 5227 comments = self._prev_comments 5228 expression = self._parse_factor() 5229 5230 this = self.expression(klass, this=this, comments=comments, expression=expression) 5231 5232 if isinstance(this, exp.Collate): 5233 expr = this.expression 5234 5235 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5236 # fallback to Identifier / Var 5237 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5238 ident = expr.this 5239 if isinstance(ident, exp.Identifier): 5240 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5241 5242 return this 5243 5244 def _parse_factor(self) -> t.Optional[exp.Expression]: 5245 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5246 this = parse_method() 5247 5248 while self._match_set(self.FACTOR): 5249 klass = self.FACTOR[self._prev.token_type] 5250 comments = self._prev_comments 5251 expression = parse_method() 5252 5253 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5254 self._retreat(self._index - 1) 5255 return this 5256 5257 this = self.expression(klass, this=this, comments=comments, expression=expression) 5258 5259 if isinstance(this, exp.Div): 5260 this.args["typed"] = self.dialect.TYPED_DIVISION 5261 this.args["safe"] = self.dialect.SAFE_DIVISION 5262 5263 return this 5264 5265 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5266 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5267 5268 def _parse_unary(self) -> t.Optional[exp.Expression]: 5269 if self._match_set(self.UNARY_PARSERS): 5270 return self.UNARY_PARSERS[self._prev.token_type](self) 5271 return self._parse_at_time_zone(self._parse_type()) 5272 5273 def _parse_type( 5274 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5275 ) -> t.Optional[exp.Expression]: 5276 interval = parse_interval and self._parse_interval() 5277 if interval: 5278 return interval 5279 5280 index = self._index 5281 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5282 5283 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5284 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5285 if isinstance(data_type, exp.Cast): 5286 # This constructor can contain ops directly after it, for instance struct unnesting: 5287 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5288 return self._parse_column_ops(data_type) 5289 5290 if data_type: 5291 index2 = self._index 5292 this = self._parse_primary() 5293 5294 if isinstance(this, exp.Literal): 5295 literal = this.name 5296 this = self._parse_column_ops(this) 5297 5298 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5299 if parser: 5300 return parser(self, this, data_type) 5301 5302 if ( 5303 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5304 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5305 and TIME_ZONE_RE.search(literal) 5306 ): 5307 data_type = exp.DataType.build("TIMESTAMPTZ") 5308 5309 return self.expression(exp.Cast, this=this, to=data_type) 5310 5311 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5312 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5313 # 5314 # If the index difference here is greater than 1, that means the parser itself must have 5315 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5316 # 5317 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5318 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5319 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5320 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5321 # 5322 # In these cases, we don't really want to return the converted type, but instead retreat 5323 # and try to parse a Column or Identifier in the section below. 5324 if data_type.expressions and index2 - index > 1: 5325 self._retreat(index2) 5326 return self._parse_column_ops(data_type) 5327 5328 self._retreat(index) 5329 5330 if fallback_to_identifier: 5331 return self._parse_id_var() 5332 5333 this = self._parse_column() 5334 return this and self._parse_column_ops(this) 5335 5336 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5337 this = self._parse_type() 5338 if not this: 5339 return None 5340 5341 if isinstance(this, exp.Column) and not this.table: 5342 this = exp.var(this.name.upper()) 5343 5344 return self.expression( 5345 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5346 ) 5347 5348 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5349 type_name = identifier.name 5350 5351 while self._match(TokenType.DOT): 5352 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5353 5354 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5355 5356 def _parse_types( 5357 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5358 ) -> t.Optional[exp.Expression]: 5359 index = self._index 5360 5361 this: t.Optional[exp.Expression] = None 5362 prefix = self._match_text_seq("SYSUDTLIB", ".") 5363 5364 if self._match_set(self.TYPE_TOKENS): 5365 type_token = self._prev.token_type 5366 else: 5367 type_token = None 5368 identifier = allow_identifiers and self._parse_id_var( 5369 any_token=False, tokens=(TokenType.VAR,) 5370 ) 5371 if isinstance(identifier, exp.Identifier): 5372 try: 5373 tokens = self.dialect.tokenize(identifier.name) 5374 except TokenError: 5375 tokens = None 5376 5377 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5378 type_token = tokens[0].token_type 5379 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5380 this = self._parse_user_defined_type(identifier) 5381 else: 5382 self._retreat(self._index - 1) 5383 return None 5384 else: 5385 return None 5386 5387 if type_token == TokenType.PSEUDO_TYPE: 5388 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5389 5390 if type_token == TokenType.OBJECT_IDENTIFIER: 5391 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5392 5393 # https://materialize.com/docs/sql/types/map/ 5394 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5395 key_type = self._parse_types( 5396 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5397 ) 5398 if not self._match(TokenType.FARROW): 5399 self._retreat(index) 5400 return None 5401 5402 value_type = self._parse_types( 5403 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5404 ) 5405 if not self._match(TokenType.R_BRACKET): 5406 self._retreat(index) 5407 return None 5408 5409 return exp.DataType( 5410 this=exp.DataType.Type.MAP, 5411 expressions=[key_type, value_type], 5412 nested=True, 5413 prefix=prefix, 5414 ) 5415 5416 nested = type_token in self.NESTED_TYPE_TOKENS 5417 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5418 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5419 expressions = None 5420 maybe_func = False 5421 5422 if self._match(TokenType.L_PAREN): 5423 if is_struct: 5424 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5425 elif nested: 5426 expressions = self._parse_csv( 5427 lambda: self._parse_types( 5428 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5429 ) 5430 ) 5431 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5432 this = expressions[0] 5433 this.set("nullable", True) 5434 self._match_r_paren() 5435 return this 5436 elif type_token in self.ENUM_TYPE_TOKENS: 5437 expressions = self._parse_csv(self._parse_equality) 5438 elif is_aggregate: 5439 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5440 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5441 ) 5442 if not func_or_ident: 5443 return None 5444 expressions = [func_or_ident] 5445 if self._match(TokenType.COMMA): 5446 expressions.extend( 5447 self._parse_csv( 5448 lambda: self._parse_types( 5449 check_func=check_func, 5450 schema=schema, 5451 allow_identifiers=allow_identifiers, 5452 ) 5453 ) 5454 ) 5455 else: 5456 expressions = self._parse_csv(self._parse_type_size) 5457 5458 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5459 if type_token == TokenType.VECTOR and len(expressions) == 2: 5460 expressions = self._parse_vector_expressions(expressions) 5461 5462 if not self._match(TokenType.R_PAREN): 5463 self._retreat(index) 5464 return None 5465 5466 maybe_func = True 5467 5468 values: t.Optional[t.List[exp.Expression]] = None 5469 5470 if nested and self._match(TokenType.LT): 5471 if is_struct: 5472 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5473 else: 5474 expressions = self._parse_csv( 5475 lambda: self._parse_types( 5476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5477 ) 5478 ) 5479 5480 if not self._match(TokenType.GT): 5481 self.raise_error("Expecting >") 5482 5483 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5484 values = self._parse_csv(self._parse_assignment) 5485 if not values and is_struct: 5486 values = None 5487 self._retreat(self._index - 1) 5488 else: 5489 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5490 5491 if type_token in self.TIMESTAMPS: 5492 if self._match_text_seq("WITH", "TIME", "ZONE"): 5493 maybe_func = False 5494 tz_type = ( 5495 exp.DataType.Type.TIMETZ 5496 if type_token in self.TIMES 5497 else exp.DataType.Type.TIMESTAMPTZ 5498 ) 5499 this = exp.DataType(this=tz_type, expressions=expressions) 5500 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5501 maybe_func = False 5502 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5503 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5504 maybe_func = False 5505 elif type_token == TokenType.INTERVAL: 5506 unit = self._parse_var(upper=True) 5507 if unit: 5508 if self._match_text_seq("TO"): 5509 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5510 5511 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5512 else: 5513 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5514 elif type_token == TokenType.VOID: 5515 this = exp.DataType(this=exp.DataType.Type.NULL) 5516 5517 if maybe_func and check_func: 5518 index2 = self._index 5519 peek = self._parse_string() 5520 5521 if not peek: 5522 self._retreat(index) 5523 return None 5524 5525 self._retreat(index2) 5526 5527 if not this: 5528 if self._match_text_seq("UNSIGNED"): 5529 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5530 if not unsigned_type_token: 5531 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5532 5533 type_token = unsigned_type_token or type_token 5534 5535 # NULLABLE without parentheses can be a column (Presto/Trino) 5536 if type_token == TokenType.NULLABLE and not expressions: 5537 self._retreat(index) 5538 return None 5539 5540 this = exp.DataType( 5541 this=exp.DataType.Type[type_token.value], 5542 expressions=expressions, 5543 nested=nested, 5544 prefix=prefix, 5545 ) 5546 5547 # Empty arrays/structs are allowed 5548 if values is not None: 5549 cls = exp.Struct if is_struct else exp.Array 5550 this = exp.cast(cls(expressions=values), this, copy=False) 5551 5552 elif expressions: 5553 this.set("expressions", expressions) 5554 5555 # https://materialize.com/docs/sql/types/list/#type-name 5556 while self._match(TokenType.LIST): 5557 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5558 5559 index = self._index 5560 5561 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5562 matched_array = self._match(TokenType.ARRAY) 5563 5564 while self._curr: 5565 datatype_token = self._prev.token_type 5566 matched_l_bracket = self._match(TokenType.L_BRACKET) 5567 5568 if (not matched_l_bracket and not matched_array) or ( 5569 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5570 ): 5571 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5572 # not to be confused with the fixed size array parsing 5573 break 5574 5575 matched_array = False 5576 values = self._parse_csv(self._parse_assignment) or None 5577 if ( 5578 values 5579 and not schema 5580 and ( 5581 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5582 ) 5583 ): 5584 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5585 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5586 self._retreat(index) 5587 break 5588 5589 this = exp.DataType( 5590 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5591 ) 5592 self._match(TokenType.R_BRACKET) 5593 5594 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5595 converter = self.TYPE_CONVERTERS.get(this.this) 5596 if converter: 5597 this = converter(t.cast(exp.DataType, this)) 5598 5599 return this 5600 5601 def _parse_vector_expressions( 5602 self, expressions: t.List[exp.Expression] 5603 ) -> t.List[exp.Expression]: 5604 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5605 5606 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5607 index = self._index 5608 5609 if ( 5610 self._curr 5611 and self._next 5612 and self._curr.token_type in self.TYPE_TOKENS 5613 and self._next.token_type in self.TYPE_TOKENS 5614 ): 5615 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5616 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5617 this = self._parse_id_var() 5618 else: 5619 this = ( 5620 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5621 or self._parse_id_var() 5622 ) 5623 5624 self._match(TokenType.COLON) 5625 5626 if ( 5627 type_required 5628 and not isinstance(this, exp.DataType) 5629 and not self._match_set(self.TYPE_TOKENS, advance=False) 5630 ): 5631 self._retreat(index) 5632 return self._parse_types() 5633 5634 return self._parse_column_def(this) 5635 5636 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5637 if not self._match_text_seq("AT", "TIME", "ZONE"): 5638 return this 5639 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5640 5641 def _parse_column(self) -> t.Optional[exp.Expression]: 5642 this = self._parse_column_reference() 5643 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5644 5645 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5646 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5647 5648 return column 5649 5650 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5651 this = self._parse_field() 5652 if ( 5653 not this 5654 and self._match(TokenType.VALUES, advance=False) 5655 and self.VALUES_FOLLOWED_BY_PAREN 5656 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5657 ): 5658 this = self._parse_id_var() 5659 5660 if isinstance(this, exp.Identifier): 5661 # We bubble up comments from the Identifier to the Column 5662 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5663 5664 return this 5665 5666 def _parse_colon_as_variant_extract( 5667 self, this: t.Optional[exp.Expression] 5668 ) -> t.Optional[exp.Expression]: 5669 casts = [] 5670 json_path = [] 5671 escape = None 5672 5673 while self._match(TokenType.COLON): 5674 start_index = self._index 5675 5676 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5677 path = self._parse_column_ops( 5678 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5679 ) 5680 5681 # The cast :: operator has a lower precedence than the extraction operator :, so 5682 # we rearrange the AST appropriately to avoid casting the JSON path 5683 while isinstance(path, exp.Cast): 5684 casts.append(path.to) 5685 path = path.this 5686 5687 if casts: 5688 dcolon_offset = next( 5689 i 5690 for i, t in enumerate(self._tokens[start_index:]) 5691 if t.token_type == TokenType.DCOLON 5692 ) 5693 end_token = self._tokens[start_index + dcolon_offset - 1] 5694 else: 5695 end_token = self._prev 5696 5697 if path: 5698 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5699 # it'll roundtrip to a string literal in GET_PATH 5700 if isinstance(path, exp.Identifier) and path.quoted: 5701 escape = True 5702 5703 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5704 5705 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5706 # Databricks transforms it back to the colon/dot notation 5707 if json_path: 5708 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5709 5710 if json_path_expr: 5711 json_path_expr.set("escape", escape) 5712 5713 this = self.expression( 5714 exp.JSONExtract, 5715 this=this, 5716 expression=json_path_expr, 5717 variant_extract=True, 5718 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5719 ) 5720 5721 while casts: 5722 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5723 5724 return this 5725 5726 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5727 return self._parse_types() 5728 5729 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5730 this = self._parse_bracket(this) 5731 5732 while self._match_set(self.COLUMN_OPERATORS): 5733 op_token = self._prev.token_type 5734 op = self.COLUMN_OPERATORS.get(op_token) 5735 5736 if op_token in self.CAST_COLUMN_OPERATORS: 5737 field = self._parse_dcolon() 5738 if not field: 5739 self.raise_error("Expected type") 5740 elif op and self._curr: 5741 field = self._parse_column_reference() or self._parse_bitwise() 5742 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5743 field = self._parse_column_ops(field) 5744 else: 5745 field = self._parse_field(any_token=True, anonymous_func=True) 5746 5747 # Function calls can be qualified, e.g., x.y.FOO() 5748 # This converts the final AST to a series of Dots leading to the function call 5749 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5750 if isinstance(field, (exp.Func, exp.Window)) and this: 5751 this = this.transform( 5752 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5753 ) 5754 5755 if op: 5756 this = op(self, this, field) 5757 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5758 this = self.expression( 5759 exp.Column, 5760 comments=this.comments, 5761 this=field, 5762 table=this.this, 5763 db=this.args.get("table"), 5764 catalog=this.args.get("db"), 5765 ) 5766 elif isinstance(field, exp.Window): 5767 # Move the exp.Dot's to the window's function 5768 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5769 field.set("this", window_func) 5770 this = field 5771 else: 5772 this = self.expression(exp.Dot, this=this, expression=field) 5773 5774 if field and field.comments: 5775 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5776 5777 this = self._parse_bracket(this) 5778 5779 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5780 5781 def _parse_paren(self) -> t.Optional[exp.Expression]: 5782 if not self._match(TokenType.L_PAREN): 5783 return None 5784 5785 comments = self._prev_comments 5786 query = self._parse_select() 5787 5788 if query: 5789 expressions = [query] 5790 else: 5791 expressions = self._parse_expressions() 5792 5793 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5794 5795 if not this and self._match(TokenType.R_PAREN, advance=False): 5796 this = self.expression(exp.Tuple) 5797 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5798 this = self._parse_subquery(this=this, parse_alias=False) 5799 elif isinstance(this, exp.Subquery): 5800 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5801 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5802 this = self.expression(exp.Tuple, expressions=expressions) 5803 else: 5804 this = self.expression(exp.Paren, this=this) 5805 5806 if this: 5807 this.add_comments(comments) 5808 5809 self._match_r_paren(expression=this) 5810 5811 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5812 return self._parse_window(this) 5813 5814 return this 5815 5816 def _parse_primary(self) -> t.Optional[exp.Expression]: 5817 if self._match_set(self.PRIMARY_PARSERS): 5818 token_type = self._prev.token_type 5819 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5820 5821 if token_type == TokenType.STRING: 5822 expressions = [primary] 5823 while self._match(TokenType.STRING): 5824 expressions.append(exp.Literal.string(self._prev.text)) 5825 5826 if len(expressions) > 1: 5827 return self.expression(exp.Concat, expressions=expressions) 5828 5829 return primary 5830 5831 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5832 return exp.Literal.number(f"0.{self._prev.text}") 5833 5834 return self._parse_paren() 5835 5836 def _parse_field( 5837 self, 5838 any_token: bool = False, 5839 tokens: t.Optional[t.Collection[TokenType]] = None, 5840 anonymous_func: bool = False, 5841 ) -> t.Optional[exp.Expression]: 5842 if anonymous_func: 5843 field = ( 5844 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5845 or self._parse_primary() 5846 ) 5847 else: 5848 field = self._parse_primary() or self._parse_function( 5849 anonymous=anonymous_func, any_token=any_token 5850 ) 5851 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5852 5853 def _parse_function( 5854 self, 5855 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5856 anonymous: bool = False, 5857 optional_parens: bool = True, 5858 any_token: bool = False, 5859 ) -> t.Optional[exp.Expression]: 5860 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5861 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5862 fn_syntax = False 5863 if ( 5864 self._match(TokenType.L_BRACE, advance=False) 5865 and self._next 5866 and self._next.text.upper() == "FN" 5867 ): 5868 self._advance(2) 5869 fn_syntax = True 5870 5871 func = self._parse_function_call( 5872 functions=functions, 5873 anonymous=anonymous, 5874 optional_parens=optional_parens, 5875 any_token=any_token, 5876 ) 5877 5878 if fn_syntax: 5879 self._match(TokenType.R_BRACE) 5880 5881 return func 5882 5883 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5884 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5885 5886 def _parse_function_call( 5887 self, 5888 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5889 anonymous: bool = False, 5890 optional_parens: bool = True, 5891 any_token: bool = False, 5892 ) -> t.Optional[exp.Expression]: 5893 if not self._curr: 5894 return None 5895 5896 comments = self._curr.comments 5897 prev = self._prev 5898 token = self._curr 5899 token_type = self._curr.token_type 5900 this = self._curr.text 5901 upper = this.upper() 5902 5903 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5904 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5905 self._advance() 5906 return self._parse_window(parser(self)) 5907 5908 if not self._next or self._next.token_type != TokenType.L_PAREN: 5909 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5910 self._advance() 5911 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5912 5913 return None 5914 5915 if any_token: 5916 if token_type in self.RESERVED_TOKENS: 5917 return None 5918 elif token_type not in self.FUNC_TOKENS: 5919 return None 5920 5921 self._advance(2) 5922 5923 parser = self.FUNCTION_PARSERS.get(upper) 5924 if parser and not anonymous: 5925 this = parser(self) 5926 else: 5927 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5928 5929 if subquery_predicate: 5930 expr = None 5931 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5932 expr = self._parse_select() 5933 self._match_r_paren() 5934 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5935 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5936 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5937 self._advance(-1) 5938 expr = self._parse_bitwise() 5939 5940 if expr: 5941 return self.expression(subquery_predicate, comments=comments, this=expr) 5942 5943 if functions is None: 5944 functions = self.FUNCTIONS 5945 5946 function = functions.get(upper) 5947 known_function = function and not anonymous 5948 5949 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5950 args = self._parse_function_args(alias) 5951 5952 post_func_comments = self._curr and self._curr.comments 5953 if known_function and post_func_comments: 5954 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5955 # call we'll construct it as exp.Anonymous, even if it's "known" 5956 if any( 5957 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5958 for comment in post_func_comments 5959 ): 5960 known_function = False 5961 5962 if alias and known_function: 5963 args = self._kv_to_prop_eq(args) 5964 5965 if known_function: 5966 func_builder = t.cast(t.Callable, function) 5967 5968 if "dialect" in func_builder.__code__.co_varnames: 5969 func = func_builder(args, dialect=self.dialect) 5970 else: 5971 func = func_builder(args) 5972 5973 func = self.validate_expression(func, args) 5974 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5975 func.meta["name"] = this 5976 5977 this = func 5978 else: 5979 if token_type == TokenType.IDENTIFIER: 5980 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5981 5982 this = self.expression(exp.Anonymous, this=this, expressions=args) 5983 this = this.update_positions(token) 5984 5985 if isinstance(this, exp.Expression): 5986 this.add_comments(comments) 5987 5988 self._match_r_paren(this) 5989 return self._parse_window(this) 5990 5991 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5992 return expression 5993 5994 def _kv_to_prop_eq( 5995 self, expressions: t.List[exp.Expression], parse_map: bool = False 5996 ) -> t.List[exp.Expression]: 5997 transformed = [] 5998 5999 for index, e in enumerate(expressions): 6000 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6001 if isinstance(e, exp.Alias): 6002 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6003 6004 if not isinstance(e, exp.PropertyEQ): 6005 e = self.expression( 6006 exp.PropertyEQ, 6007 this=e.this if parse_map else exp.to_identifier(e.this.name), 6008 expression=e.expression, 6009 ) 6010 6011 if isinstance(e.this, exp.Column): 6012 e.this.replace(e.this.this) 6013 else: 6014 e = self._to_prop_eq(e, index) 6015 6016 transformed.append(e) 6017 6018 return transformed 6019 6020 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6021 return self._parse_statement() 6022 6023 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6024 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6025 6026 def _parse_user_defined_function( 6027 self, kind: t.Optional[TokenType] = None 6028 ) -> t.Optional[exp.Expression]: 6029 this = self._parse_table_parts(schema=True) 6030 6031 if not self._match(TokenType.L_PAREN): 6032 return this 6033 6034 expressions = self._parse_csv(self._parse_function_parameter) 6035 self._match_r_paren() 6036 return self.expression( 6037 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6038 ) 6039 6040 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6041 literal = self._parse_primary() 6042 if literal: 6043 return self.expression(exp.Introducer, this=token.text, expression=literal) 6044 6045 return self._identifier_expression(token) 6046 6047 def _parse_session_parameter(self) -> exp.SessionParameter: 6048 kind = None 6049 this = self._parse_id_var() or self._parse_primary() 6050 6051 if this and self._match(TokenType.DOT): 6052 kind = this.name 6053 this = self._parse_var() or self._parse_primary() 6054 6055 return self.expression(exp.SessionParameter, this=this, kind=kind) 6056 6057 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6058 return self._parse_id_var() 6059 6060 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6061 index = self._index 6062 6063 if self._match(TokenType.L_PAREN): 6064 expressions = t.cast( 6065 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6066 ) 6067 6068 if not self._match(TokenType.R_PAREN): 6069 self._retreat(index) 6070 else: 6071 expressions = [self._parse_lambda_arg()] 6072 6073 if self._match_set(self.LAMBDAS): 6074 return self.LAMBDAS[self._prev.token_type](self, expressions) 6075 6076 self._retreat(index) 6077 6078 this: t.Optional[exp.Expression] 6079 6080 if self._match(TokenType.DISTINCT): 6081 this = self.expression( 6082 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6083 ) 6084 else: 6085 this = self._parse_select_or_expression(alias=alias) 6086 6087 return self._parse_limit( 6088 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6089 ) 6090 6091 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6092 index = self._index 6093 if not self._match(TokenType.L_PAREN): 6094 return this 6095 6096 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6097 # expr can be of both types 6098 if self._match_set(self.SELECT_START_TOKENS): 6099 self._retreat(index) 6100 return this 6101 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6102 self._match_r_paren() 6103 return self.expression(exp.Schema, this=this, expressions=args) 6104 6105 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6106 return self._parse_column_def(self._parse_field(any_token=True)) 6107 6108 def _parse_column_def( 6109 self, this: t.Optional[exp.Expression], computed_column: bool = True 6110 ) -> t.Optional[exp.Expression]: 6111 # column defs are not really columns, they're identifiers 6112 if isinstance(this, exp.Column): 6113 this = this.this 6114 6115 if not computed_column: 6116 self._match(TokenType.ALIAS) 6117 6118 kind = self._parse_types(schema=True) 6119 6120 if self._match_text_seq("FOR", "ORDINALITY"): 6121 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6122 6123 constraints: t.List[exp.Expression] = [] 6124 6125 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6126 ("ALIAS", "MATERIALIZED") 6127 ): 6128 persisted = self._prev.text.upper() == "MATERIALIZED" 6129 constraint_kind = exp.ComputedColumnConstraint( 6130 this=self._parse_assignment(), 6131 persisted=persisted or self._match_text_seq("PERSISTED"), 6132 data_type=exp.Var(this="AUTO") 6133 if self._match_text_seq("AUTO") 6134 else self._parse_types(), 6135 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6136 ) 6137 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6138 elif ( 6139 kind 6140 and self._match(TokenType.ALIAS, advance=False) 6141 and ( 6142 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6143 or (self._next and self._next.token_type == TokenType.L_PAREN) 6144 ) 6145 ): 6146 self._advance() 6147 constraints.append( 6148 self.expression( 6149 exp.ColumnConstraint, 6150 kind=exp.ComputedColumnConstraint( 6151 this=self._parse_disjunction(), 6152 persisted=self._match_texts(("STORED", "VIRTUAL")) 6153 and self._prev.text.upper() == "STORED", 6154 ), 6155 ) 6156 ) 6157 6158 while True: 6159 constraint = self._parse_column_constraint() 6160 if not constraint: 6161 break 6162 constraints.append(constraint) 6163 6164 if not kind and not constraints: 6165 return this 6166 6167 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6168 6169 def _parse_auto_increment( 6170 self, 6171 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6172 start = None 6173 increment = None 6174 order = None 6175 6176 if self._match(TokenType.L_PAREN, advance=False): 6177 args = self._parse_wrapped_csv(self._parse_bitwise) 6178 start = seq_get(args, 0) 6179 increment = seq_get(args, 1) 6180 elif self._match_text_seq("START"): 6181 start = self._parse_bitwise() 6182 self._match_text_seq("INCREMENT") 6183 increment = self._parse_bitwise() 6184 if self._match_text_seq("ORDER"): 6185 order = True 6186 elif self._match_text_seq("NOORDER"): 6187 order = False 6188 6189 if start and increment: 6190 return exp.GeneratedAsIdentityColumnConstraint( 6191 start=start, increment=increment, this=False, order=order 6192 ) 6193 6194 return exp.AutoIncrementColumnConstraint() 6195 6196 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6197 if not self._match_text_seq("REFRESH"): 6198 self._retreat(self._index - 1) 6199 return None 6200 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6201 6202 def _parse_compress(self) -> exp.CompressColumnConstraint: 6203 if self._match(TokenType.L_PAREN, advance=False): 6204 return self.expression( 6205 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6206 ) 6207 6208 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6209 6210 def _parse_generated_as_identity( 6211 self, 6212 ) -> ( 6213 exp.GeneratedAsIdentityColumnConstraint 6214 | exp.ComputedColumnConstraint 6215 | exp.GeneratedAsRowColumnConstraint 6216 ): 6217 if self._match_text_seq("BY", "DEFAULT"): 6218 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6219 this = self.expression( 6220 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6221 ) 6222 else: 6223 self._match_text_seq("ALWAYS") 6224 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6225 6226 self._match(TokenType.ALIAS) 6227 6228 if self._match_text_seq("ROW"): 6229 start = self._match_text_seq("START") 6230 if not start: 6231 self._match(TokenType.END) 6232 hidden = self._match_text_seq("HIDDEN") 6233 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6234 6235 identity = self._match_text_seq("IDENTITY") 6236 6237 if self._match(TokenType.L_PAREN): 6238 if self._match(TokenType.START_WITH): 6239 this.set("start", self._parse_bitwise()) 6240 if self._match_text_seq("INCREMENT", "BY"): 6241 this.set("increment", self._parse_bitwise()) 6242 if self._match_text_seq("MINVALUE"): 6243 this.set("minvalue", self._parse_bitwise()) 6244 if self._match_text_seq("MAXVALUE"): 6245 this.set("maxvalue", self._parse_bitwise()) 6246 6247 if self._match_text_seq("CYCLE"): 6248 this.set("cycle", True) 6249 elif self._match_text_seq("NO", "CYCLE"): 6250 this.set("cycle", False) 6251 6252 if not identity: 6253 this.set("expression", self._parse_range()) 6254 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6255 args = self._parse_csv(self._parse_bitwise) 6256 this.set("start", seq_get(args, 0)) 6257 this.set("increment", seq_get(args, 1)) 6258 6259 self._match_r_paren() 6260 6261 return this 6262 6263 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6264 self._match_text_seq("LENGTH") 6265 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6266 6267 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6268 if self._match_text_seq("NULL"): 6269 return self.expression(exp.NotNullColumnConstraint) 6270 if self._match_text_seq("CASESPECIFIC"): 6271 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6272 if self._match_text_seq("FOR", "REPLICATION"): 6273 return self.expression(exp.NotForReplicationColumnConstraint) 6274 6275 # Unconsume the `NOT` token 6276 self._retreat(self._index - 1) 6277 return None 6278 6279 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6280 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6281 6282 procedure_option_follows = ( 6283 self._match(TokenType.WITH, advance=False) 6284 and self._next 6285 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6286 ) 6287 6288 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6289 return self.expression( 6290 exp.ColumnConstraint, 6291 this=this, 6292 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6293 ) 6294 6295 return this 6296 6297 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6298 if not self._match(TokenType.CONSTRAINT): 6299 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6300 6301 return self.expression( 6302 exp.Constraint, 6303 this=self._parse_id_var(), 6304 expressions=self._parse_unnamed_constraints(), 6305 ) 6306 6307 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6308 constraints = [] 6309 while True: 6310 constraint = self._parse_unnamed_constraint() or self._parse_function() 6311 if not constraint: 6312 break 6313 constraints.append(constraint) 6314 6315 return constraints 6316 6317 def _parse_unnamed_constraint( 6318 self, constraints: t.Optional[t.Collection[str]] = None 6319 ) -> t.Optional[exp.Expression]: 6320 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6321 constraints or self.CONSTRAINT_PARSERS 6322 ): 6323 return None 6324 6325 constraint = self._prev.text.upper() 6326 if constraint not in self.CONSTRAINT_PARSERS: 6327 self.raise_error(f"No parser found for schema constraint {constraint}.") 6328 6329 return self.CONSTRAINT_PARSERS[constraint](self) 6330 6331 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6332 return self._parse_id_var(any_token=False) 6333 6334 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6335 self._match_texts(("KEY", "INDEX")) 6336 return self.expression( 6337 exp.UniqueColumnConstraint, 6338 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6339 this=self._parse_schema(self._parse_unique_key()), 6340 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6341 on_conflict=self._parse_on_conflict(), 6342 options=self._parse_key_constraint_options(), 6343 ) 6344 6345 def _parse_key_constraint_options(self) -> t.List[str]: 6346 options = [] 6347 while True: 6348 if not self._curr: 6349 break 6350 6351 if self._match(TokenType.ON): 6352 action = None 6353 on = self._advance_any() and self._prev.text 6354 6355 if self._match_text_seq("NO", "ACTION"): 6356 action = "NO ACTION" 6357 elif self._match_text_seq("CASCADE"): 6358 action = "CASCADE" 6359 elif self._match_text_seq("RESTRICT"): 6360 action = "RESTRICT" 6361 elif self._match_pair(TokenType.SET, TokenType.NULL): 6362 action = "SET NULL" 6363 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6364 action = "SET DEFAULT" 6365 else: 6366 self.raise_error("Invalid key constraint") 6367 6368 options.append(f"ON {on} {action}") 6369 else: 6370 var = self._parse_var_from_options( 6371 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6372 ) 6373 if not var: 6374 break 6375 options.append(var.name) 6376 6377 return options 6378 6379 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6380 if match and not self._match(TokenType.REFERENCES): 6381 return None 6382 6383 expressions = None 6384 this = self._parse_table(schema=True) 6385 options = self._parse_key_constraint_options() 6386 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6387 6388 def _parse_foreign_key(self) -> exp.ForeignKey: 6389 expressions = ( 6390 self._parse_wrapped_id_vars() 6391 if not self._match(TokenType.REFERENCES, advance=False) 6392 else None 6393 ) 6394 reference = self._parse_references() 6395 on_options = {} 6396 6397 while self._match(TokenType.ON): 6398 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6399 self.raise_error("Expected DELETE or UPDATE") 6400 6401 kind = self._prev.text.lower() 6402 6403 if self._match_text_seq("NO", "ACTION"): 6404 action = "NO ACTION" 6405 elif self._match(TokenType.SET): 6406 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6407 action = "SET " + self._prev.text.upper() 6408 else: 6409 self._advance() 6410 action = self._prev.text.upper() 6411 6412 on_options[kind] = action 6413 6414 return self.expression( 6415 exp.ForeignKey, 6416 expressions=expressions, 6417 reference=reference, 6418 options=self._parse_key_constraint_options(), 6419 **on_options, # type: ignore 6420 ) 6421 6422 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6423 return self._parse_ordered() or self._parse_field() 6424 6425 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6426 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6427 self._retreat(self._index - 1) 6428 return None 6429 6430 id_vars = self._parse_wrapped_id_vars() 6431 return self.expression( 6432 exp.PeriodForSystemTimeConstraint, 6433 this=seq_get(id_vars, 0), 6434 expression=seq_get(id_vars, 1), 6435 ) 6436 6437 def _parse_primary_key( 6438 self, wrapped_optional: bool = False, in_props: bool = False 6439 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6440 desc = ( 6441 self._match_set((TokenType.ASC, TokenType.DESC)) 6442 and self._prev.token_type == TokenType.DESC 6443 ) 6444 6445 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6446 return self.expression( 6447 exp.PrimaryKeyColumnConstraint, 6448 desc=desc, 6449 options=self._parse_key_constraint_options(), 6450 ) 6451 6452 expressions = self._parse_wrapped_csv( 6453 self._parse_primary_key_part, optional=wrapped_optional 6454 ) 6455 6456 return self.expression( 6457 exp.PrimaryKey, 6458 expressions=expressions, 6459 include=self._parse_index_params(), 6460 options=self._parse_key_constraint_options(), 6461 ) 6462 6463 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6464 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6465 6466 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6467 """ 6468 Parses a datetime column in ODBC format. We parse the column into the corresponding 6469 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6470 same as we did for `DATE('yyyy-mm-dd')`. 6471 6472 Reference: 6473 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6474 """ 6475 self._match(TokenType.VAR) 6476 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6477 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6478 if not self._match(TokenType.R_BRACE): 6479 self.raise_error("Expected }") 6480 return expression 6481 6482 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6483 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6484 return this 6485 6486 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6487 map_token = seq_get(self._tokens, self._index - 2) 6488 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6489 else: 6490 parse_map = False 6491 6492 bracket_kind = self._prev.token_type 6493 if ( 6494 bracket_kind == TokenType.L_BRACE 6495 and self._curr 6496 and self._curr.token_type == TokenType.VAR 6497 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6498 ): 6499 return self._parse_odbc_datetime_literal() 6500 6501 expressions = self._parse_csv( 6502 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6503 ) 6504 6505 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6506 self.raise_error("Expected ]") 6507 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6508 self.raise_error("Expected }") 6509 6510 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6511 if bracket_kind == TokenType.L_BRACE: 6512 this = self.expression( 6513 exp.Struct, 6514 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6515 ) 6516 elif not this: 6517 this = build_array_constructor( 6518 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6519 ) 6520 else: 6521 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6522 if constructor_type: 6523 return build_array_constructor( 6524 constructor_type, 6525 args=expressions, 6526 bracket_kind=bracket_kind, 6527 dialect=self.dialect, 6528 ) 6529 6530 expressions = apply_index_offset( 6531 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6532 ) 6533 this = self.expression( 6534 exp.Bracket, 6535 this=this, 6536 expressions=expressions, 6537 comments=this.pop_comments(), 6538 ) 6539 6540 self._add_comments(this) 6541 return self._parse_bracket(this) 6542 6543 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6544 if self._match(TokenType.COLON): 6545 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6546 return this 6547 6548 def _parse_case(self) -> t.Optional[exp.Expression]: 6549 if self._match(TokenType.DOT, advance=False): 6550 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6551 self._retreat(self._index - 1) 6552 return None 6553 6554 ifs = [] 6555 default = None 6556 6557 comments = self._prev_comments 6558 expression = self._parse_assignment() 6559 6560 while self._match(TokenType.WHEN): 6561 this = self._parse_assignment() 6562 self._match(TokenType.THEN) 6563 then = self._parse_assignment() 6564 ifs.append(self.expression(exp.If, this=this, true=then)) 6565 6566 if self._match(TokenType.ELSE): 6567 default = self._parse_assignment() 6568 6569 if not self._match(TokenType.END): 6570 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6571 default = exp.column("interval") 6572 else: 6573 self.raise_error("Expected END after CASE", self._prev) 6574 6575 return self.expression( 6576 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6577 ) 6578 6579 def _parse_if(self) -> t.Optional[exp.Expression]: 6580 if self._match(TokenType.L_PAREN): 6581 args = self._parse_csv( 6582 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6583 ) 6584 this = self.validate_expression(exp.If.from_arg_list(args), args) 6585 self._match_r_paren() 6586 else: 6587 index = self._index - 1 6588 6589 if self.NO_PAREN_IF_COMMANDS and index == 0: 6590 return self._parse_as_command(self._prev) 6591 6592 condition = self._parse_assignment() 6593 6594 if not condition: 6595 self._retreat(index) 6596 return None 6597 6598 self._match(TokenType.THEN) 6599 true = self._parse_assignment() 6600 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6601 self._match(TokenType.END) 6602 this = self.expression(exp.If, this=condition, true=true, false=false) 6603 6604 return this 6605 6606 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6607 if not self._match_text_seq("VALUE", "FOR"): 6608 self._retreat(self._index - 1) 6609 return None 6610 6611 return self.expression( 6612 exp.NextValueFor, 6613 this=self._parse_column(), 6614 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6615 ) 6616 6617 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6618 this = self._parse_function() or self._parse_var_or_string(upper=True) 6619 6620 if self._match(TokenType.FROM): 6621 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6622 6623 if not self._match(TokenType.COMMA): 6624 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6625 6626 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6627 6628 def _parse_gap_fill(self) -> exp.GapFill: 6629 self._match(TokenType.TABLE) 6630 this = self._parse_table() 6631 6632 self._match(TokenType.COMMA) 6633 args = [this, *self._parse_csv(self._parse_lambda)] 6634 6635 gap_fill = exp.GapFill.from_arg_list(args) 6636 return self.validate_expression(gap_fill, args) 6637 6638 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6639 this = self._parse_assignment() 6640 6641 if not self._match(TokenType.ALIAS): 6642 if self._match(TokenType.COMMA): 6643 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6644 6645 self.raise_error("Expected AS after CAST") 6646 6647 fmt = None 6648 to = self._parse_types() 6649 6650 default = self._match(TokenType.DEFAULT) 6651 if default: 6652 default = self._parse_bitwise() 6653 self._match_text_seq("ON", "CONVERSION", "ERROR") 6654 6655 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6656 fmt_string = self._parse_string() 6657 fmt = self._parse_at_time_zone(fmt_string) 6658 6659 if not to: 6660 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6661 if to.this in exp.DataType.TEMPORAL_TYPES: 6662 this = self.expression( 6663 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6664 this=this, 6665 format=exp.Literal.string( 6666 format_time( 6667 fmt_string.this if fmt_string else "", 6668 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6669 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6670 ) 6671 ), 6672 safe=safe, 6673 ) 6674 6675 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6676 this.set("zone", fmt.args["zone"]) 6677 return this 6678 elif not to: 6679 self.raise_error("Expected TYPE after CAST") 6680 elif isinstance(to, exp.Identifier): 6681 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6682 elif to.this == exp.DataType.Type.CHAR: 6683 if self._match(TokenType.CHARACTER_SET): 6684 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6685 6686 return self.build_cast( 6687 strict=strict, 6688 this=this, 6689 to=to, 6690 format=fmt, 6691 safe=safe, 6692 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6693 default=default, 6694 ) 6695 6696 def _parse_string_agg(self) -> exp.GroupConcat: 6697 if self._match(TokenType.DISTINCT): 6698 args: t.List[t.Optional[exp.Expression]] = [ 6699 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6700 ] 6701 if self._match(TokenType.COMMA): 6702 args.extend(self._parse_csv(self._parse_assignment)) 6703 else: 6704 args = self._parse_csv(self._parse_assignment) # type: ignore 6705 6706 if self._match_text_seq("ON", "OVERFLOW"): 6707 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6708 if self._match_text_seq("ERROR"): 6709 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6710 else: 6711 self._match_text_seq("TRUNCATE") 6712 on_overflow = self.expression( 6713 exp.OverflowTruncateBehavior, 6714 this=self._parse_string(), 6715 with_count=( 6716 self._match_text_seq("WITH", "COUNT") 6717 or not self._match_text_seq("WITHOUT", "COUNT") 6718 ), 6719 ) 6720 else: 6721 on_overflow = None 6722 6723 index = self._index 6724 if not self._match(TokenType.R_PAREN) and args: 6725 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6726 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6727 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6728 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6729 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6730 6731 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6732 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6733 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6734 if not self._match_text_seq("WITHIN", "GROUP"): 6735 self._retreat(index) 6736 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6737 6738 # The corresponding match_r_paren will be called in parse_function (caller) 6739 self._match_l_paren() 6740 6741 return self.expression( 6742 exp.GroupConcat, 6743 this=self._parse_order(this=seq_get(args, 0)), 6744 separator=seq_get(args, 1), 6745 on_overflow=on_overflow, 6746 ) 6747 6748 def _parse_convert( 6749 self, strict: bool, safe: t.Optional[bool] = None 6750 ) -> t.Optional[exp.Expression]: 6751 this = self._parse_bitwise() 6752 6753 if self._match(TokenType.USING): 6754 to: t.Optional[exp.Expression] = self.expression( 6755 exp.CharacterSet, this=self._parse_var() 6756 ) 6757 elif self._match(TokenType.COMMA): 6758 to = self._parse_types() 6759 else: 6760 to = None 6761 6762 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6763 6764 def _parse_xml_table(self) -> exp.XMLTable: 6765 namespaces = None 6766 passing = None 6767 columns = None 6768 6769 if self._match_text_seq("XMLNAMESPACES", "("): 6770 namespaces = self._parse_xml_namespace() 6771 self._match_text_seq(")", ",") 6772 6773 this = self._parse_string() 6774 6775 if self._match_text_seq("PASSING"): 6776 # The BY VALUE keywords are optional and are provided for semantic clarity 6777 self._match_text_seq("BY", "VALUE") 6778 passing = self._parse_csv(self._parse_column) 6779 6780 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6781 6782 if self._match_text_seq("COLUMNS"): 6783 columns = self._parse_csv(self._parse_field_def) 6784 6785 return self.expression( 6786 exp.XMLTable, 6787 this=this, 6788 namespaces=namespaces, 6789 passing=passing, 6790 columns=columns, 6791 by_ref=by_ref, 6792 ) 6793 6794 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6795 namespaces = [] 6796 6797 while True: 6798 if self._match(TokenType.DEFAULT): 6799 uri = self._parse_string() 6800 else: 6801 uri = self._parse_alias(self._parse_string()) 6802 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6803 if not self._match(TokenType.COMMA): 6804 break 6805 6806 return namespaces 6807 6808 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6809 args = self._parse_csv(self._parse_assignment) 6810 6811 if len(args) < 3: 6812 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6813 6814 return self.expression(exp.DecodeCase, expressions=args) 6815 6816 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6817 self._match_text_seq("KEY") 6818 key = self._parse_column() 6819 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6820 self._match_text_seq("VALUE") 6821 value = self._parse_bitwise() 6822 6823 if not key and not value: 6824 return None 6825 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6826 6827 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6828 if not this or not self._match_text_seq("FORMAT", "JSON"): 6829 return this 6830 6831 return self.expression(exp.FormatJson, this=this) 6832 6833 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6834 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6835 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6836 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6837 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6838 else: 6839 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6840 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6841 6842 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6843 6844 if not empty and not error and not null: 6845 return None 6846 6847 return self.expression( 6848 exp.OnCondition, 6849 empty=empty, 6850 error=error, 6851 null=null, 6852 ) 6853 6854 def _parse_on_handling( 6855 self, on: str, *values: str 6856 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6857 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6858 for value in values: 6859 if self._match_text_seq(value, "ON", on): 6860 return f"{value} ON {on}" 6861 6862 index = self._index 6863 if self._match(TokenType.DEFAULT): 6864 default_value = self._parse_bitwise() 6865 if self._match_text_seq("ON", on): 6866 return default_value 6867 6868 self._retreat(index) 6869 6870 return None 6871 6872 @t.overload 6873 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6874 6875 @t.overload 6876 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6877 6878 def _parse_json_object(self, agg=False): 6879 star = self._parse_star() 6880 expressions = ( 6881 [star] 6882 if star 6883 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6884 ) 6885 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6886 6887 unique_keys = None 6888 if self._match_text_seq("WITH", "UNIQUE"): 6889 unique_keys = True 6890 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6891 unique_keys = False 6892 6893 self._match_text_seq("KEYS") 6894 6895 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6896 self._parse_type() 6897 ) 6898 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6899 6900 return self.expression( 6901 exp.JSONObjectAgg if agg else exp.JSONObject, 6902 expressions=expressions, 6903 null_handling=null_handling, 6904 unique_keys=unique_keys, 6905 return_type=return_type, 6906 encoding=encoding, 6907 ) 6908 6909 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6910 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6911 if not self._match_text_seq("NESTED"): 6912 this = self._parse_id_var() 6913 kind = self._parse_types(allow_identifiers=False) 6914 nested = None 6915 else: 6916 this = None 6917 kind = None 6918 nested = True 6919 6920 path = self._match_text_seq("PATH") and self._parse_string() 6921 nested_schema = nested and self._parse_json_schema() 6922 6923 return self.expression( 6924 exp.JSONColumnDef, 6925 this=this, 6926 kind=kind, 6927 path=path, 6928 nested_schema=nested_schema, 6929 ) 6930 6931 def _parse_json_schema(self) -> exp.JSONSchema: 6932 self._match_text_seq("COLUMNS") 6933 return self.expression( 6934 exp.JSONSchema, 6935 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6936 ) 6937 6938 def _parse_json_table(self) -> exp.JSONTable: 6939 this = self._parse_format_json(self._parse_bitwise()) 6940 path = self._match(TokenType.COMMA) and self._parse_string() 6941 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6942 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6943 schema = self._parse_json_schema() 6944 6945 return exp.JSONTable( 6946 this=this, 6947 schema=schema, 6948 path=path, 6949 error_handling=error_handling, 6950 empty_handling=empty_handling, 6951 ) 6952 6953 def _parse_match_against(self) -> exp.MatchAgainst: 6954 if self._match_text_seq("TABLE"): 6955 # parse SingleStore MATCH(TABLE ...) syntax 6956 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6957 expressions = [] 6958 table = self._parse_table() 6959 if table: 6960 expressions = [table] 6961 else: 6962 expressions = self._parse_csv(self._parse_column) 6963 6964 self._match_text_seq(")", "AGAINST", "(") 6965 6966 this = self._parse_string() 6967 6968 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6969 modifier = "IN NATURAL LANGUAGE MODE" 6970 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6971 modifier = f"{modifier} WITH QUERY EXPANSION" 6972 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6973 modifier = "IN BOOLEAN MODE" 6974 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6975 modifier = "WITH QUERY EXPANSION" 6976 else: 6977 modifier = None 6978 6979 return self.expression( 6980 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6981 ) 6982 6983 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6984 def _parse_open_json(self) -> exp.OpenJSON: 6985 this = self._parse_bitwise() 6986 path = self._match(TokenType.COMMA) and self._parse_string() 6987 6988 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6989 this = self._parse_field(any_token=True) 6990 kind = self._parse_types() 6991 path = self._parse_string() 6992 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6993 6994 return self.expression( 6995 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6996 ) 6997 6998 expressions = None 6999 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7000 self._match_l_paren() 7001 expressions = self._parse_csv(_parse_open_json_column_def) 7002 7003 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7004 7005 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7006 args = self._parse_csv(self._parse_bitwise) 7007 7008 if self._match(TokenType.IN): 7009 return self.expression( 7010 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7011 ) 7012 7013 if haystack_first: 7014 haystack = seq_get(args, 0) 7015 needle = seq_get(args, 1) 7016 else: 7017 haystack = seq_get(args, 1) 7018 needle = seq_get(args, 0) 7019 7020 return self.expression( 7021 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7022 ) 7023 7024 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7025 args = self._parse_csv(self._parse_table) 7026 return exp.JoinHint(this=func_name.upper(), expressions=args) 7027 7028 def _parse_substring(self) -> exp.Substring: 7029 # Postgres supports the form: substring(string [from int] [for int]) 7030 # (despite being undocumented, the reverse order also works) 7031 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7032 7033 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7034 7035 start, length = None, None 7036 7037 while self._curr: 7038 if self._match(TokenType.FROM): 7039 start = self._parse_bitwise() 7040 elif self._match(TokenType.FOR): 7041 if not start: 7042 start = exp.Literal.number(1) 7043 length = self._parse_bitwise() 7044 else: 7045 break 7046 7047 if start: 7048 args.append(start) 7049 if length: 7050 args.append(length) 7051 7052 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7053 7054 def _parse_trim(self) -> exp.Trim: 7055 # https://www.w3resource.com/sql/character-functions/trim.php 7056 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7057 7058 position = None 7059 collation = None 7060 expression = None 7061 7062 if self._match_texts(self.TRIM_TYPES): 7063 position = self._prev.text.upper() 7064 7065 this = self._parse_bitwise() 7066 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7067 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7068 expression = self._parse_bitwise() 7069 7070 if invert_order: 7071 this, expression = expression, this 7072 7073 if self._match(TokenType.COLLATE): 7074 collation = self._parse_bitwise() 7075 7076 return self.expression( 7077 exp.Trim, this=this, position=position, expression=expression, collation=collation 7078 ) 7079 7080 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7081 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7082 7083 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7084 return self._parse_window(self._parse_id_var(), alias=True) 7085 7086 def _parse_respect_or_ignore_nulls( 7087 self, this: t.Optional[exp.Expression] 7088 ) -> t.Optional[exp.Expression]: 7089 if self._match_text_seq("IGNORE", "NULLS"): 7090 return self.expression(exp.IgnoreNulls, this=this) 7091 if self._match_text_seq("RESPECT", "NULLS"): 7092 return self.expression(exp.RespectNulls, this=this) 7093 return this 7094 7095 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7096 if self._match(TokenType.HAVING): 7097 self._match_texts(("MAX", "MIN")) 7098 max = self._prev.text.upper() != "MIN" 7099 return self.expression( 7100 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7101 ) 7102 7103 return this 7104 7105 def _parse_window( 7106 self, this: t.Optional[exp.Expression], alias: bool = False 7107 ) -> t.Optional[exp.Expression]: 7108 func = this 7109 comments = func.comments if isinstance(func, exp.Expression) else None 7110 7111 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7112 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7113 if self._match_text_seq("WITHIN", "GROUP"): 7114 order = self._parse_wrapped(self._parse_order) 7115 this = self.expression(exp.WithinGroup, this=this, expression=order) 7116 7117 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7118 self._match(TokenType.WHERE) 7119 this = self.expression( 7120 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7121 ) 7122 self._match_r_paren() 7123 7124 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7125 # Some dialects choose to implement and some do not. 7126 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7127 7128 # There is some code above in _parse_lambda that handles 7129 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7130 7131 # The below changes handle 7132 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7133 7134 # Oracle allows both formats 7135 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7136 # and Snowflake chose to do the same for familiarity 7137 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7138 if isinstance(this, exp.AggFunc): 7139 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7140 7141 if ignore_respect and ignore_respect is not this: 7142 ignore_respect.replace(ignore_respect.this) 7143 this = self.expression(ignore_respect.__class__, this=this) 7144 7145 this = self._parse_respect_or_ignore_nulls(this) 7146 7147 # bigquery select from window x AS (partition by ...) 7148 if alias: 7149 over = None 7150 self._match(TokenType.ALIAS) 7151 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7152 return this 7153 else: 7154 over = self._prev.text.upper() 7155 7156 if comments and isinstance(func, exp.Expression): 7157 func.pop_comments() 7158 7159 if not self._match(TokenType.L_PAREN): 7160 return self.expression( 7161 exp.Window, 7162 comments=comments, 7163 this=this, 7164 alias=self._parse_id_var(False), 7165 over=over, 7166 ) 7167 7168 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7169 7170 first = self._match(TokenType.FIRST) 7171 if self._match_text_seq("LAST"): 7172 first = False 7173 7174 partition, order = self._parse_partition_and_order() 7175 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7176 7177 if kind: 7178 self._match(TokenType.BETWEEN) 7179 start = self._parse_window_spec() 7180 7181 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7182 exclude = ( 7183 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7184 if self._match_text_seq("EXCLUDE") 7185 else None 7186 ) 7187 7188 spec = self.expression( 7189 exp.WindowSpec, 7190 kind=kind, 7191 start=start["value"], 7192 start_side=start["side"], 7193 end=end.get("value"), 7194 end_side=end.get("side"), 7195 exclude=exclude, 7196 ) 7197 else: 7198 spec = None 7199 7200 self._match_r_paren() 7201 7202 window = self.expression( 7203 exp.Window, 7204 comments=comments, 7205 this=this, 7206 partition_by=partition, 7207 order=order, 7208 spec=spec, 7209 alias=window_alias, 7210 over=over, 7211 first=first, 7212 ) 7213 7214 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7215 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7216 return self._parse_window(window, alias=alias) 7217 7218 return window 7219 7220 def _parse_partition_and_order( 7221 self, 7222 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7223 return self._parse_partition_by(), self._parse_order() 7224 7225 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7226 self._match(TokenType.BETWEEN) 7227 7228 return { 7229 "value": ( 7230 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7231 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7232 or self._parse_type() 7233 ), 7234 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7235 } 7236 7237 def _parse_alias( 7238 self, this: t.Optional[exp.Expression], explicit: bool = False 7239 ) -> t.Optional[exp.Expression]: 7240 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7241 # so this section tries to parse the clause version and if it fails, it treats the token 7242 # as an identifier (alias) 7243 if self._can_parse_limit_or_offset(): 7244 return this 7245 7246 any_token = self._match(TokenType.ALIAS) 7247 comments = self._prev_comments or [] 7248 7249 if explicit and not any_token: 7250 return this 7251 7252 if self._match(TokenType.L_PAREN): 7253 aliases = self.expression( 7254 exp.Aliases, 7255 comments=comments, 7256 this=this, 7257 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7258 ) 7259 self._match_r_paren(aliases) 7260 return aliases 7261 7262 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7263 self.STRING_ALIASES and self._parse_string_as_identifier() 7264 ) 7265 7266 if alias: 7267 comments.extend(alias.pop_comments()) 7268 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7269 column = this.this 7270 7271 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7272 if not this.comments and column and column.comments: 7273 this.comments = column.pop_comments() 7274 7275 return this 7276 7277 def _parse_id_var( 7278 self, 7279 any_token: bool = True, 7280 tokens: t.Optional[t.Collection[TokenType]] = None, 7281 ) -> t.Optional[exp.Expression]: 7282 expression = self._parse_identifier() 7283 if not expression and ( 7284 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7285 ): 7286 quoted = self._prev.token_type == TokenType.STRING 7287 expression = self._identifier_expression(quoted=quoted) 7288 7289 return expression 7290 7291 def _parse_string(self) -> t.Optional[exp.Expression]: 7292 if self._match_set(self.STRING_PARSERS): 7293 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7294 return self._parse_placeholder() 7295 7296 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7297 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7298 if output: 7299 output.update_positions(self._prev) 7300 return output 7301 7302 def _parse_number(self) -> t.Optional[exp.Expression]: 7303 if self._match_set(self.NUMERIC_PARSERS): 7304 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7305 return self._parse_placeholder() 7306 7307 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7308 if self._match(TokenType.IDENTIFIER): 7309 return self._identifier_expression(quoted=True) 7310 return self._parse_placeholder() 7311 7312 def _parse_var( 7313 self, 7314 any_token: bool = False, 7315 tokens: t.Optional[t.Collection[TokenType]] = None, 7316 upper: bool = False, 7317 ) -> t.Optional[exp.Expression]: 7318 if ( 7319 (any_token and self._advance_any()) 7320 or self._match(TokenType.VAR) 7321 or (self._match_set(tokens) if tokens else False) 7322 ): 7323 return self.expression( 7324 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7325 ) 7326 return self._parse_placeholder() 7327 7328 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7329 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7330 self._advance() 7331 return self._prev 7332 return None 7333 7334 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7335 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7336 7337 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7338 return self._parse_primary() or self._parse_var(any_token=True) 7339 7340 def _parse_null(self) -> t.Optional[exp.Expression]: 7341 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7342 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7343 return self._parse_placeholder() 7344 7345 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7346 if self._match(TokenType.TRUE): 7347 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7348 if self._match(TokenType.FALSE): 7349 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7350 return self._parse_placeholder() 7351 7352 def _parse_star(self) -> t.Optional[exp.Expression]: 7353 if self._match(TokenType.STAR): 7354 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7355 return self._parse_placeholder() 7356 7357 def _parse_parameter(self) -> exp.Parameter: 7358 this = self._parse_identifier() or self._parse_primary_or_var() 7359 return self.expression(exp.Parameter, this=this) 7360 7361 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7362 if self._match_set(self.PLACEHOLDER_PARSERS): 7363 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7364 if placeholder: 7365 return placeholder 7366 self._advance(-1) 7367 return None 7368 7369 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7370 if not self._match_texts(keywords): 7371 return None 7372 if self._match(TokenType.L_PAREN, advance=False): 7373 return self._parse_wrapped_csv(self._parse_expression) 7374 7375 expression = self._parse_alias(self._parse_assignment(), explicit=True) 7376 return [expression] if expression else None 7377 7378 def _parse_csv( 7379 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7380 ) -> t.List[exp.Expression]: 7381 parse_result = parse_method() 7382 items = [parse_result] if parse_result is not None else [] 7383 7384 while self._match(sep): 7385 self._add_comments(parse_result) 7386 parse_result = parse_method() 7387 if parse_result is not None: 7388 items.append(parse_result) 7389 7390 return items 7391 7392 def _parse_tokens( 7393 self, parse_method: t.Callable, expressions: t.Dict 7394 ) -> t.Optional[exp.Expression]: 7395 this = parse_method() 7396 7397 while self._match_set(expressions): 7398 this = self.expression( 7399 expressions[self._prev.token_type], 7400 this=this, 7401 comments=self._prev_comments, 7402 expression=parse_method(), 7403 ) 7404 7405 return this 7406 7407 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7408 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7409 7410 def _parse_wrapped_csv( 7411 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7412 ) -> t.List[exp.Expression]: 7413 return self._parse_wrapped( 7414 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7415 ) 7416 7417 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7418 wrapped = self._match(TokenType.L_PAREN) 7419 if not wrapped and not optional: 7420 self.raise_error("Expecting (") 7421 parse_result = parse_method() 7422 if wrapped: 7423 self._match_r_paren() 7424 return parse_result 7425 7426 def _parse_expressions(self) -> t.List[exp.Expression]: 7427 return self._parse_csv(self._parse_expression) 7428 7429 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7430 return ( 7431 self._parse_set_operations( 7432 self._parse_alias(self._parse_assignment(), explicit=True) 7433 if alias 7434 else self._parse_assignment() 7435 ) 7436 or self._parse_select() 7437 ) 7438 7439 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7440 return self._parse_query_modifiers( 7441 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7442 ) 7443 7444 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7445 this = None 7446 if self._match_texts(self.TRANSACTION_KIND): 7447 this = self._prev.text 7448 7449 self._match_texts(("TRANSACTION", "WORK")) 7450 7451 modes = [] 7452 while True: 7453 mode = [] 7454 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7455 mode.append(self._prev.text) 7456 7457 if mode: 7458 modes.append(" ".join(mode)) 7459 if not self._match(TokenType.COMMA): 7460 break 7461 7462 return self.expression(exp.Transaction, this=this, modes=modes) 7463 7464 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7465 chain = None 7466 savepoint = None 7467 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7468 7469 self._match_texts(("TRANSACTION", "WORK")) 7470 7471 if self._match_text_seq("TO"): 7472 self._match_text_seq("SAVEPOINT") 7473 savepoint = self._parse_id_var() 7474 7475 if self._match(TokenType.AND): 7476 chain = not self._match_text_seq("NO") 7477 self._match_text_seq("CHAIN") 7478 7479 if is_rollback: 7480 return self.expression(exp.Rollback, savepoint=savepoint) 7481 7482 return self.expression(exp.Commit, chain=chain) 7483 7484 def _parse_refresh(self) -> exp.Refresh: 7485 self._match(TokenType.TABLE) 7486 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7487 7488 def _parse_column_def_with_exists(self): 7489 start = self._index 7490 self._match(TokenType.COLUMN) 7491 7492 exists_column = self._parse_exists(not_=True) 7493 expression = self._parse_field_def() 7494 7495 if not isinstance(expression, exp.ColumnDef): 7496 self._retreat(start) 7497 return None 7498 7499 expression.set("exists", exists_column) 7500 7501 return expression 7502 7503 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7504 if not self._prev.text.upper() == "ADD": 7505 return None 7506 7507 expression = self._parse_column_def_with_exists() 7508 if not expression: 7509 return None 7510 7511 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7512 if self._match_texts(("FIRST", "AFTER")): 7513 position = self._prev.text 7514 column_position = self.expression( 7515 exp.ColumnPosition, this=self._parse_column(), position=position 7516 ) 7517 expression.set("position", column_position) 7518 7519 return expression 7520 7521 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7522 drop = self._match(TokenType.DROP) and self._parse_drop() 7523 if drop and not isinstance(drop, exp.Command): 7524 drop.set("kind", drop.args.get("kind", "COLUMN")) 7525 return drop 7526 7527 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7528 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7529 return self.expression( 7530 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7531 ) 7532 7533 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7534 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7535 self._match_text_seq("ADD") 7536 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7537 return self.expression( 7538 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7539 ) 7540 7541 column_def = self._parse_add_column() 7542 if isinstance(column_def, exp.ColumnDef): 7543 return column_def 7544 7545 exists = self._parse_exists(not_=True) 7546 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7547 return self.expression( 7548 exp.AddPartition, 7549 exists=exists, 7550 this=self._parse_field(any_token=True), 7551 location=self._match_text_seq("LOCATION", advance=False) 7552 and self._parse_property(), 7553 ) 7554 7555 return None 7556 7557 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7558 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7559 or self._match_text_seq("COLUMNS") 7560 ): 7561 schema = self._parse_schema() 7562 7563 return ( 7564 ensure_list(schema) 7565 if schema 7566 else self._parse_csv(self._parse_column_def_with_exists) 7567 ) 7568 7569 return self._parse_csv(_parse_add_alteration) 7570 7571 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7572 if self._match_texts(self.ALTER_ALTER_PARSERS): 7573 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7574 7575 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7576 # keyword after ALTER we default to parsing this statement 7577 self._match(TokenType.COLUMN) 7578 column = self._parse_field(any_token=True) 7579 7580 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7581 return self.expression(exp.AlterColumn, this=column, drop=True) 7582 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7583 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7584 if self._match(TokenType.COMMENT): 7585 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7586 if self._match_text_seq("DROP", "NOT", "NULL"): 7587 return self.expression( 7588 exp.AlterColumn, 7589 this=column, 7590 drop=True, 7591 allow_null=True, 7592 ) 7593 if self._match_text_seq("SET", "NOT", "NULL"): 7594 return self.expression( 7595 exp.AlterColumn, 7596 this=column, 7597 allow_null=False, 7598 ) 7599 7600 if self._match_text_seq("SET", "VISIBLE"): 7601 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7602 if self._match_text_seq("SET", "INVISIBLE"): 7603 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7604 7605 self._match_text_seq("SET", "DATA") 7606 self._match_text_seq("TYPE") 7607 return self.expression( 7608 exp.AlterColumn, 7609 this=column, 7610 dtype=self._parse_types(), 7611 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7612 using=self._match(TokenType.USING) and self._parse_assignment(), 7613 ) 7614 7615 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7616 if self._match_texts(("ALL", "EVEN", "AUTO")): 7617 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7618 7619 self._match_text_seq("KEY", "DISTKEY") 7620 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7621 7622 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7623 if compound: 7624 self._match_text_seq("SORTKEY") 7625 7626 if self._match(TokenType.L_PAREN, advance=False): 7627 return self.expression( 7628 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7629 ) 7630 7631 self._match_texts(("AUTO", "NONE")) 7632 return self.expression( 7633 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7634 ) 7635 7636 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7637 index = self._index - 1 7638 7639 partition_exists = self._parse_exists() 7640 if self._match(TokenType.PARTITION, advance=False): 7641 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7642 7643 self._retreat(index) 7644 return self._parse_csv(self._parse_drop_column) 7645 7646 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7647 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7648 exists = self._parse_exists() 7649 old_column = self._parse_column() 7650 to = self._match_text_seq("TO") 7651 new_column = self._parse_column() 7652 7653 if old_column is None or to is None or new_column is None: 7654 return None 7655 7656 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7657 7658 self._match_text_seq("TO") 7659 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7660 7661 def _parse_alter_table_set(self) -> exp.AlterSet: 7662 alter_set = self.expression(exp.AlterSet) 7663 7664 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7665 "TABLE", "PROPERTIES" 7666 ): 7667 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7668 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7669 alter_set.set("expressions", [self._parse_assignment()]) 7670 elif self._match_texts(("LOGGED", "UNLOGGED")): 7671 alter_set.set("option", exp.var(self._prev.text.upper())) 7672 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7673 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7674 elif self._match_text_seq("LOCATION"): 7675 alter_set.set("location", self._parse_field()) 7676 elif self._match_text_seq("ACCESS", "METHOD"): 7677 alter_set.set("access_method", self._parse_field()) 7678 elif self._match_text_seq("TABLESPACE"): 7679 alter_set.set("tablespace", self._parse_field()) 7680 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7681 alter_set.set("file_format", [self._parse_field()]) 7682 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7683 alter_set.set("file_format", self._parse_wrapped_options()) 7684 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7685 alter_set.set("copy_options", self._parse_wrapped_options()) 7686 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7687 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7688 else: 7689 if self._match_text_seq("SERDE"): 7690 alter_set.set("serde", self._parse_field()) 7691 7692 properties = self._parse_wrapped(self._parse_properties, optional=True) 7693 alter_set.set("expressions", [properties]) 7694 7695 return alter_set 7696 7697 def _parse_alter_session(self) -> exp.AlterSession: 7698 """Parse ALTER SESSION SET/UNSET statements.""" 7699 if self._match(TokenType.SET): 7700 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7701 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7702 7703 self._match_text_seq("UNSET") 7704 expressions = self._parse_csv( 7705 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7706 ) 7707 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7708 7709 def _parse_alter(self) -> exp.Alter | exp.Command: 7710 start = self._prev 7711 7712 alter_token = self._match_set(self.ALTERABLES) and self._prev 7713 if not alter_token: 7714 return self._parse_as_command(start) 7715 7716 exists = self._parse_exists() 7717 only = self._match_text_seq("ONLY") 7718 7719 if alter_token.token_type == TokenType.SESSION: 7720 this = None 7721 check = None 7722 cluster = None 7723 else: 7724 this = self._parse_table(schema=True) 7725 check = self._match_text_seq("WITH", "CHECK") 7726 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7727 7728 if self._next: 7729 self._advance() 7730 7731 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7732 if parser: 7733 actions = ensure_list(parser(self)) 7734 not_valid = self._match_text_seq("NOT", "VALID") 7735 options = self._parse_csv(self._parse_property) 7736 7737 if not self._curr and actions: 7738 return self.expression( 7739 exp.Alter, 7740 this=this, 7741 kind=alter_token.text.upper(), 7742 exists=exists, 7743 actions=actions, 7744 only=only, 7745 options=options, 7746 cluster=cluster, 7747 not_valid=not_valid, 7748 check=check, 7749 ) 7750 7751 return self._parse_as_command(start) 7752 7753 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7754 start = self._prev 7755 # https://duckdb.org/docs/sql/statements/analyze 7756 if not self._curr: 7757 return self.expression(exp.Analyze) 7758 7759 options = [] 7760 while self._match_texts(self.ANALYZE_STYLES): 7761 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7762 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7763 else: 7764 options.append(self._prev.text.upper()) 7765 7766 this: t.Optional[exp.Expression] = None 7767 inner_expression: t.Optional[exp.Expression] = None 7768 7769 kind = self._curr and self._curr.text.upper() 7770 7771 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7772 this = self._parse_table_parts() 7773 elif self._match_text_seq("TABLES"): 7774 if self._match_set((TokenType.FROM, TokenType.IN)): 7775 kind = f"{kind} {self._prev.text.upper()}" 7776 this = self._parse_table(schema=True, is_db_reference=True) 7777 elif self._match_text_seq("DATABASE"): 7778 this = self._parse_table(schema=True, is_db_reference=True) 7779 elif self._match_text_seq("CLUSTER"): 7780 this = self._parse_table() 7781 # Try matching inner expr keywords before fallback to parse table. 7782 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7783 kind = None 7784 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7785 else: 7786 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7787 kind = None 7788 this = self._parse_table_parts() 7789 7790 partition = self._try_parse(self._parse_partition) 7791 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7792 return self._parse_as_command(start) 7793 7794 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7795 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7796 "WITH", "ASYNC", "MODE" 7797 ): 7798 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7799 else: 7800 mode = None 7801 7802 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7803 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7804 7805 properties = self._parse_properties() 7806 return self.expression( 7807 exp.Analyze, 7808 kind=kind, 7809 this=this, 7810 mode=mode, 7811 partition=partition, 7812 properties=properties, 7813 expression=inner_expression, 7814 options=options, 7815 ) 7816 7817 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7818 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7819 this = None 7820 kind = self._prev.text.upper() 7821 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7822 expressions = [] 7823 7824 if not self._match_text_seq("STATISTICS"): 7825 self.raise_error("Expecting token STATISTICS") 7826 7827 if self._match_text_seq("NOSCAN"): 7828 this = "NOSCAN" 7829 elif self._match(TokenType.FOR): 7830 if self._match_text_seq("ALL", "COLUMNS"): 7831 this = "FOR ALL COLUMNS" 7832 if self._match_texts("COLUMNS"): 7833 this = "FOR COLUMNS" 7834 expressions = self._parse_csv(self._parse_column_reference) 7835 elif self._match_text_seq("SAMPLE"): 7836 sample = self._parse_number() 7837 expressions = [ 7838 self.expression( 7839 exp.AnalyzeSample, 7840 sample=sample, 7841 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7842 ) 7843 ] 7844 7845 return self.expression( 7846 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7847 ) 7848 7849 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7850 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7851 kind = None 7852 this = None 7853 expression: t.Optional[exp.Expression] = None 7854 if self._match_text_seq("REF", "UPDATE"): 7855 kind = "REF" 7856 this = "UPDATE" 7857 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7858 this = "UPDATE SET DANGLING TO NULL" 7859 elif self._match_text_seq("STRUCTURE"): 7860 kind = "STRUCTURE" 7861 if self._match_text_seq("CASCADE", "FAST"): 7862 this = "CASCADE FAST" 7863 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7864 ("ONLINE", "OFFLINE") 7865 ): 7866 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7867 expression = self._parse_into() 7868 7869 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7870 7871 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7872 this = self._prev.text.upper() 7873 if self._match_text_seq("COLUMNS"): 7874 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7875 return None 7876 7877 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7878 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7879 if self._match_text_seq("STATISTICS"): 7880 return self.expression(exp.AnalyzeDelete, kind=kind) 7881 return None 7882 7883 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7884 if self._match_text_seq("CHAINED", "ROWS"): 7885 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7886 return None 7887 7888 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7889 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7890 this = self._prev.text.upper() 7891 expression: t.Optional[exp.Expression] = None 7892 expressions = [] 7893 update_options = None 7894 7895 if self._match_text_seq("HISTOGRAM", "ON"): 7896 expressions = self._parse_csv(self._parse_column_reference) 7897 with_expressions = [] 7898 while self._match(TokenType.WITH): 7899 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7900 if self._match_texts(("SYNC", "ASYNC")): 7901 if self._match_text_seq("MODE", advance=False): 7902 with_expressions.append(f"{self._prev.text.upper()} MODE") 7903 self._advance() 7904 else: 7905 buckets = self._parse_number() 7906 if self._match_text_seq("BUCKETS"): 7907 with_expressions.append(f"{buckets} BUCKETS") 7908 if with_expressions: 7909 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7910 7911 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7912 TokenType.UPDATE, advance=False 7913 ): 7914 update_options = self._prev.text.upper() 7915 self._advance() 7916 elif self._match_text_seq("USING", "DATA"): 7917 expression = self.expression(exp.UsingData, this=self._parse_string()) 7918 7919 return self.expression( 7920 exp.AnalyzeHistogram, 7921 this=this, 7922 expressions=expressions, 7923 expression=expression, 7924 update_options=update_options, 7925 ) 7926 7927 def _parse_merge(self) -> exp.Merge: 7928 self._match(TokenType.INTO) 7929 target = self._parse_table() 7930 7931 if target and self._match(TokenType.ALIAS, advance=False): 7932 target.set("alias", self._parse_table_alias()) 7933 7934 self._match(TokenType.USING) 7935 using = self._parse_table() 7936 7937 self._match(TokenType.ON) 7938 on = self._parse_assignment() 7939 7940 return self.expression( 7941 exp.Merge, 7942 this=target, 7943 using=using, 7944 on=on, 7945 whens=self._parse_when_matched(), 7946 returning=self._parse_returning(), 7947 ) 7948 7949 def _parse_when_matched(self) -> exp.Whens: 7950 whens = [] 7951 7952 while self._match(TokenType.WHEN): 7953 matched = not self._match(TokenType.NOT) 7954 self._match_text_seq("MATCHED") 7955 source = ( 7956 False 7957 if self._match_text_seq("BY", "TARGET") 7958 else self._match_text_seq("BY", "SOURCE") 7959 ) 7960 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7961 7962 self._match(TokenType.THEN) 7963 7964 if self._match(TokenType.INSERT): 7965 this = self._parse_star() 7966 if this: 7967 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7968 else: 7969 then = self.expression( 7970 exp.Insert, 7971 this=exp.var("ROW") 7972 if self._match_text_seq("ROW") 7973 else self._parse_value(values=False), 7974 expression=self._match_text_seq("VALUES") and self._parse_value(), 7975 ) 7976 elif self._match(TokenType.UPDATE): 7977 expressions = self._parse_star() 7978 if expressions: 7979 then = self.expression(exp.Update, expressions=expressions) 7980 else: 7981 then = self.expression( 7982 exp.Update, 7983 expressions=self._match(TokenType.SET) 7984 and self._parse_csv(self._parse_equality), 7985 ) 7986 elif self._match(TokenType.DELETE): 7987 then = self.expression(exp.Var, this=self._prev.text) 7988 else: 7989 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7990 7991 whens.append( 7992 self.expression( 7993 exp.When, 7994 matched=matched, 7995 source=source, 7996 condition=condition, 7997 then=then, 7998 ) 7999 ) 8000 return self.expression(exp.Whens, expressions=whens) 8001 8002 def _parse_show(self) -> t.Optional[exp.Expression]: 8003 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8004 if parser: 8005 return parser(self) 8006 return self._parse_as_command(self._prev) 8007 8008 def _parse_set_item_assignment( 8009 self, kind: t.Optional[str] = None 8010 ) -> t.Optional[exp.Expression]: 8011 index = self._index 8012 8013 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8014 return self._parse_set_transaction(global_=kind == "GLOBAL") 8015 8016 left = self._parse_primary() or self._parse_column() 8017 assignment_delimiter = self._match_texts(("=", "TO")) 8018 8019 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8020 self._retreat(index) 8021 return None 8022 8023 right = self._parse_statement() or self._parse_id_var() 8024 if isinstance(right, (exp.Column, exp.Identifier)): 8025 right = exp.var(right.name) 8026 8027 this = self.expression(exp.EQ, this=left, expression=right) 8028 return self.expression(exp.SetItem, this=this, kind=kind) 8029 8030 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8031 self._match_text_seq("TRANSACTION") 8032 characteristics = self._parse_csv( 8033 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8034 ) 8035 return self.expression( 8036 exp.SetItem, 8037 expressions=characteristics, 8038 kind="TRANSACTION", 8039 **{"global": global_}, # type: ignore 8040 ) 8041 8042 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8043 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8044 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8045 8046 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8047 index = self._index 8048 set_ = self.expression( 8049 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8050 ) 8051 8052 if self._curr: 8053 self._retreat(index) 8054 return self._parse_as_command(self._prev) 8055 8056 return set_ 8057 8058 def _parse_var_from_options( 8059 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8060 ) -> t.Optional[exp.Var]: 8061 start = self._curr 8062 if not start: 8063 return None 8064 8065 option = start.text.upper() 8066 continuations = options.get(option) 8067 8068 index = self._index 8069 self._advance() 8070 for keywords in continuations or []: 8071 if isinstance(keywords, str): 8072 keywords = (keywords,) 8073 8074 if self._match_text_seq(*keywords): 8075 option = f"{option} {' '.join(keywords)}" 8076 break 8077 else: 8078 if continuations or continuations is None: 8079 if raise_unmatched: 8080 self.raise_error(f"Unknown option {option}") 8081 8082 self._retreat(index) 8083 return None 8084 8085 return exp.var(option) 8086 8087 def _parse_as_command(self, start: Token) -> exp.Command: 8088 while self._curr: 8089 self._advance() 8090 text = self._find_sql(start, self._prev) 8091 size = len(start.text) 8092 self._warn_unsupported() 8093 return exp.Command(this=text[:size], expression=text[size:]) 8094 8095 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8096 settings = [] 8097 8098 self._match_l_paren() 8099 kind = self._parse_id_var() 8100 8101 if self._match(TokenType.L_PAREN): 8102 while True: 8103 key = self._parse_id_var() 8104 value = self._parse_primary() 8105 if not key and value is None: 8106 break 8107 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8108 self._match(TokenType.R_PAREN) 8109 8110 self._match_r_paren() 8111 8112 return self.expression( 8113 exp.DictProperty, 8114 this=this, 8115 kind=kind.this if kind else None, 8116 settings=settings, 8117 ) 8118 8119 def _parse_dict_range(self, this: str) -> exp.DictRange: 8120 self._match_l_paren() 8121 has_min = self._match_text_seq("MIN") 8122 if has_min: 8123 min = self._parse_var() or self._parse_primary() 8124 self._match_text_seq("MAX") 8125 max = self._parse_var() or self._parse_primary() 8126 else: 8127 max = self._parse_var() or self._parse_primary() 8128 min = exp.Literal.number(0) 8129 self._match_r_paren() 8130 return self.expression(exp.DictRange, this=this, min=min, max=max) 8131 8132 def _parse_comprehension( 8133 self, this: t.Optional[exp.Expression] 8134 ) -> t.Optional[exp.Comprehension]: 8135 index = self._index 8136 expression = self._parse_column() 8137 if not self._match(TokenType.IN): 8138 self._retreat(index - 1) 8139 return None 8140 iterator = self._parse_column() 8141 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8142 return self.expression( 8143 exp.Comprehension, 8144 this=this, 8145 expression=expression, 8146 iterator=iterator, 8147 condition=condition, 8148 ) 8149 8150 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8151 if self._match(TokenType.HEREDOC_STRING): 8152 return self.expression(exp.Heredoc, this=self._prev.text) 8153 8154 if not self._match_text_seq("$"): 8155 return None 8156 8157 tags = ["$"] 8158 tag_text = None 8159 8160 if self._is_connected(): 8161 self._advance() 8162 tags.append(self._prev.text.upper()) 8163 else: 8164 self.raise_error("No closing $ found") 8165 8166 if tags[-1] != "$": 8167 if self._is_connected() and self._match_text_seq("$"): 8168 tag_text = tags[-1] 8169 tags.append("$") 8170 else: 8171 self.raise_error("No closing $ found") 8172 8173 heredoc_start = self._curr 8174 8175 while self._curr: 8176 if self._match_text_seq(*tags, advance=False): 8177 this = self._find_sql(heredoc_start, self._prev) 8178 self._advance(len(tags)) 8179 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8180 8181 self._advance() 8182 8183 self.raise_error(f"No closing {''.join(tags)} found") 8184 return None 8185 8186 def _find_parser( 8187 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8188 ) -> t.Optional[t.Callable]: 8189 if not self._curr: 8190 return None 8191 8192 index = self._index 8193 this = [] 8194 while True: 8195 # The current token might be multiple words 8196 curr = self._curr.text.upper() 8197 key = curr.split(" ") 8198 this.append(curr) 8199 8200 self._advance() 8201 result, trie = in_trie(trie, key) 8202 if result == TrieResult.FAILED: 8203 break 8204 8205 if result == TrieResult.EXISTS: 8206 subparser = parsers[" ".join(this)] 8207 return subparser 8208 8209 self._retreat(index) 8210 return None 8211 8212 def _match(self, token_type, advance=True, expression=None): 8213 if not self._curr: 8214 return None 8215 8216 if self._curr.token_type == token_type: 8217 if advance: 8218 self._advance() 8219 self._add_comments(expression) 8220 return True 8221 8222 return None 8223 8224 def _match_set(self, types, advance=True): 8225 if not self._curr: 8226 return None 8227 8228 if self._curr.token_type in types: 8229 if advance: 8230 self._advance() 8231 return True 8232 8233 return None 8234 8235 def _match_pair(self, token_type_a, token_type_b, advance=True): 8236 if not self._curr or not self._next: 8237 return None 8238 8239 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8240 if advance: 8241 self._advance(2) 8242 return True 8243 8244 return None 8245 8246 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8247 if not self._match(TokenType.L_PAREN, expression=expression): 8248 self.raise_error("Expecting (") 8249 8250 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8251 if not self._match(TokenType.R_PAREN, expression=expression): 8252 self.raise_error("Expecting )") 8253 8254 def _match_texts(self, texts, advance=True): 8255 if ( 8256 self._curr 8257 and self._curr.token_type != TokenType.STRING 8258 and self._curr.text.upper() in texts 8259 ): 8260 if advance: 8261 self._advance() 8262 return True 8263 return None 8264 8265 def _match_text_seq(self, *texts, advance=True): 8266 index = self._index 8267 for text in texts: 8268 if ( 8269 self._curr 8270 and self._curr.token_type != TokenType.STRING 8271 and self._curr.text.upper() == text 8272 ): 8273 self._advance() 8274 else: 8275 self._retreat(index) 8276 return None 8277 8278 if not advance: 8279 self._retreat(index) 8280 8281 return True 8282 8283 def _replace_lambda( 8284 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8285 ) -> t.Optional[exp.Expression]: 8286 if not node: 8287 return node 8288 8289 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8290 8291 for column in node.find_all(exp.Column): 8292 typ = lambda_types.get(column.parts[0].name) 8293 if typ is not None: 8294 dot_or_id = column.to_dot() if column.table else column.this 8295 8296 if typ: 8297 dot_or_id = self.expression( 8298 exp.Cast, 8299 this=dot_or_id, 8300 to=typ, 8301 ) 8302 8303 parent = column.parent 8304 8305 while isinstance(parent, exp.Dot): 8306 if not isinstance(parent.parent, exp.Dot): 8307 parent.replace(dot_or_id) 8308 break 8309 parent = parent.parent 8310 else: 8311 if column is node: 8312 node = dot_or_id 8313 else: 8314 column.replace(dot_or_id) 8315 return node 8316 8317 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8318 start = self._prev 8319 8320 # Not to be confused with TRUNCATE(number, decimals) function call 8321 if self._match(TokenType.L_PAREN): 8322 self._retreat(self._index - 2) 8323 return self._parse_function() 8324 8325 # Clickhouse supports TRUNCATE DATABASE as well 8326 is_database = self._match(TokenType.DATABASE) 8327 8328 self._match(TokenType.TABLE) 8329 8330 exists = self._parse_exists(not_=False) 8331 8332 expressions = self._parse_csv( 8333 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8334 ) 8335 8336 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8337 8338 if self._match_text_seq("RESTART", "IDENTITY"): 8339 identity = "RESTART" 8340 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8341 identity = "CONTINUE" 8342 else: 8343 identity = None 8344 8345 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8346 option = self._prev.text 8347 else: 8348 option = None 8349 8350 partition = self._parse_partition() 8351 8352 # Fallback case 8353 if self._curr: 8354 return self._parse_as_command(start) 8355 8356 return self.expression( 8357 exp.TruncateTable, 8358 expressions=expressions, 8359 is_database=is_database, 8360 exists=exists, 8361 cluster=cluster, 8362 identity=identity, 8363 option=option, 8364 partition=partition, 8365 ) 8366 8367 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8368 this = self._parse_ordered(self._parse_opclass) 8369 8370 if not self._match(TokenType.WITH): 8371 return this 8372 8373 op = self._parse_var(any_token=True) 8374 8375 return self.expression(exp.WithOperator, this=this, op=op) 8376 8377 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8378 self._match(TokenType.EQ) 8379 self._match(TokenType.L_PAREN) 8380 8381 opts: t.List[t.Optional[exp.Expression]] = [] 8382 option: exp.Expression | None 8383 while self._curr and not self._match(TokenType.R_PAREN): 8384 if self._match_text_seq("FORMAT_NAME", "="): 8385 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8386 option = self._parse_format_name() 8387 else: 8388 option = self._parse_property() 8389 8390 if option is None: 8391 self.raise_error("Unable to parse option") 8392 break 8393 8394 opts.append(option) 8395 8396 return opts 8397 8398 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8399 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8400 8401 options = [] 8402 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8403 option = self._parse_var(any_token=True) 8404 prev = self._prev.text.upper() 8405 8406 # Different dialects might separate options and values by white space, "=" and "AS" 8407 self._match(TokenType.EQ) 8408 self._match(TokenType.ALIAS) 8409 8410 param = self.expression(exp.CopyParameter, this=option) 8411 8412 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8413 TokenType.L_PAREN, advance=False 8414 ): 8415 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8416 param.set("expressions", self._parse_wrapped_options()) 8417 elif prev == "FILE_FORMAT": 8418 # T-SQL's external file format case 8419 param.set("expression", self._parse_field()) 8420 else: 8421 param.set("expression", self._parse_unquoted_field()) 8422 8423 options.append(param) 8424 self._match(sep) 8425 8426 return options 8427 8428 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8429 expr = self.expression(exp.Credentials) 8430 8431 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8432 expr.set("storage", self._parse_field()) 8433 if self._match_text_seq("CREDENTIALS"): 8434 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8435 creds = ( 8436 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8437 ) 8438 expr.set("credentials", creds) 8439 if self._match_text_seq("ENCRYPTION"): 8440 expr.set("encryption", self._parse_wrapped_options()) 8441 if self._match_text_seq("IAM_ROLE"): 8442 expr.set("iam_role", self._parse_field()) 8443 if self._match_text_seq("REGION"): 8444 expr.set("region", self._parse_field()) 8445 8446 return expr 8447 8448 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8449 return self._parse_field() 8450 8451 def _parse_copy(self) -> exp.Copy | exp.Command: 8452 start = self._prev 8453 8454 self._match(TokenType.INTO) 8455 8456 this = ( 8457 self._parse_select(nested=True, parse_subquery_alias=False) 8458 if self._match(TokenType.L_PAREN, advance=False) 8459 else self._parse_table(schema=True) 8460 ) 8461 8462 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8463 8464 files = self._parse_csv(self._parse_file_location) 8465 if self._match(TokenType.EQ, advance=False): 8466 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8467 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8468 # list via `_parse_wrapped(..)` below. 8469 self._advance(-1) 8470 files = [] 8471 8472 credentials = self._parse_credentials() 8473 8474 self._match_text_seq("WITH") 8475 8476 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8477 8478 # Fallback case 8479 if self._curr: 8480 return self._parse_as_command(start) 8481 8482 return self.expression( 8483 exp.Copy, 8484 this=this, 8485 kind=kind, 8486 credentials=credentials, 8487 files=files, 8488 params=params, 8489 ) 8490 8491 def _parse_normalize(self) -> exp.Normalize: 8492 return self.expression( 8493 exp.Normalize, 8494 this=self._parse_bitwise(), 8495 form=self._match(TokenType.COMMA) and self._parse_var(), 8496 ) 8497 8498 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8499 args = self._parse_csv(lambda: self._parse_lambda()) 8500 8501 this = seq_get(args, 0) 8502 decimals = seq_get(args, 1) 8503 8504 return expr_type( 8505 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8506 ) 8507 8508 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8509 star_token = self._prev 8510 8511 if self._match_text_seq("COLUMNS", "(", advance=False): 8512 this = self._parse_function() 8513 if isinstance(this, exp.Columns): 8514 this.set("unpack", True) 8515 return this 8516 8517 return self.expression( 8518 exp.Star, 8519 **{ # type: ignore 8520 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8521 "replace": self._parse_star_op("REPLACE"), 8522 "rename": self._parse_star_op("RENAME"), 8523 }, 8524 ).update_positions(star_token) 8525 8526 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8527 privilege_parts = [] 8528 8529 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8530 # (end of privilege list) or L_PAREN (start of column list) are met 8531 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8532 privilege_parts.append(self._curr.text.upper()) 8533 self._advance() 8534 8535 this = exp.var(" ".join(privilege_parts)) 8536 expressions = ( 8537 self._parse_wrapped_csv(self._parse_column) 8538 if self._match(TokenType.L_PAREN, advance=False) 8539 else None 8540 ) 8541 8542 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8543 8544 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8545 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8546 principal = self._parse_id_var() 8547 8548 if not principal: 8549 return None 8550 8551 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8552 8553 def _parse_grant_revoke_common( 8554 self, 8555 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8556 privileges = self._parse_csv(self._parse_grant_privilege) 8557 8558 self._match(TokenType.ON) 8559 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8560 8561 # Attempt to parse the securable e.g. MySQL allows names 8562 # such as "foo.*", "*.*" which are not easily parseable yet 8563 securable = self._try_parse(self._parse_table_parts) 8564 8565 return privileges, kind, securable 8566 8567 def _parse_grant(self) -> exp.Grant | exp.Command: 8568 start = self._prev 8569 8570 privileges, kind, securable = self._parse_grant_revoke_common() 8571 8572 if not securable or not self._match_text_seq("TO"): 8573 return self._parse_as_command(start) 8574 8575 principals = self._parse_csv(self._parse_grant_principal) 8576 8577 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8578 8579 if self._curr: 8580 return self._parse_as_command(start) 8581 8582 return self.expression( 8583 exp.Grant, 8584 privileges=privileges, 8585 kind=kind, 8586 securable=securable, 8587 principals=principals, 8588 grant_option=grant_option, 8589 ) 8590 8591 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8592 start = self._prev 8593 8594 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8595 8596 privileges, kind, securable = self._parse_grant_revoke_common() 8597 8598 if not securable or not self._match_text_seq("FROM"): 8599 return self._parse_as_command(start) 8600 8601 principals = self._parse_csv(self._parse_grant_principal) 8602 8603 cascade = None 8604 if self._match_texts(("CASCADE", "RESTRICT")): 8605 cascade = self._prev.text.upper() 8606 8607 if self._curr: 8608 return self._parse_as_command(start) 8609 8610 return self.expression( 8611 exp.Revoke, 8612 privileges=privileges, 8613 kind=kind, 8614 securable=securable, 8615 principals=principals, 8616 grant_option=grant_option, 8617 cascade=cascade, 8618 ) 8619 8620 def _parse_overlay(self) -> exp.Overlay: 8621 return self.expression( 8622 exp.Overlay, 8623 **{ # type: ignore 8624 "this": self._parse_bitwise(), 8625 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8626 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8627 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8628 }, 8629 ) 8630 8631 def _parse_format_name(self) -> exp.Property: 8632 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8633 # for FILE_FORMAT = <format_name> 8634 return self.expression( 8635 exp.Property, 8636 this=exp.var("FORMAT_NAME"), 8637 value=self._parse_string() or self._parse_table_parts(), 8638 ) 8639 8640 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8641 args: t.List[exp.Expression] = [] 8642 8643 if self._match(TokenType.DISTINCT): 8644 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8645 self._match(TokenType.COMMA) 8646 8647 args.extend(self._parse_csv(self._parse_assignment)) 8648 8649 return self.expression( 8650 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8651 ) 8652 8653 def _identifier_expression( 8654 self, token: t.Optional[Token] = None, **kwargs: t.Any 8655 ) -> exp.Identifier: 8656 token = token or self._prev 8657 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8658 expression.update_positions(token) 8659 return expression 8660 8661 def _build_pipe_cte( 8662 self, 8663 query: exp.Query, 8664 expressions: t.List[exp.Expression], 8665 alias_cte: t.Optional[exp.TableAlias] = None, 8666 ) -> exp.Select: 8667 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8668 if alias_cte: 8669 new_cte = alias_cte 8670 else: 8671 self._pipe_cte_counter += 1 8672 new_cte = f"__tmp{self._pipe_cte_counter}" 8673 8674 with_ = query.args.get("with") 8675 ctes = with_.pop() if with_ else None 8676 8677 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8678 if ctes: 8679 new_select.set("with", ctes) 8680 8681 return new_select.with_(new_cte, as_=query, copy=False) 8682 8683 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8684 select = self._parse_select(consume_pipe=False) 8685 if not select: 8686 return query 8687 8688 return self._build_pipe_cte( 8689 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8690 ) 8691 8692 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8693 limit = self._parse_limit() 8694 offset = self._parse_offset() 8695 if limit: 8696 curr_limit = query.args.get("limit", limit) 8697 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8698 query.limit(limit, copy=False) 8699 if offset: 8700 curr_offset = query.args.get("offset") 8701 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8702 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8703 8704 return query 8705 8706 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8707 this = self._parse_assignment() 8708 if self._match_text_seq("GROUP", "AND", advance=False): 8709 return this 8710 8711 this = self._parse_alias(this) 8712 8713 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8714 return self._parse_ordered(lambda: this) 8715 8716 return this 8717 8718 def _parse_pipe_syntax_aggregate_group_order_by( 8719 self, query: exp.Select, group_by_exists: bool = True 8720 ) -> exp.Select: 8721 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8722 aggregates_or_groups, orders = [], [] 8723 for element in expr: 8724 if isinstance(element, exp.Ordered): 8725 this = element.this 8726 if isinstance(this, exp.Alias): 8727 element.set("this", this.args["alias"]) 8728 orders.append(element) 8729 else: 8730 this = element 8731 aggregates_or_groups.append(this) 8732 8733 if group_by_exists: 8734 query.select(*aggregates_or_groups, copy=False).group_by( 8735 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8736 copy=False, 8737 ) 8738 else: 8739 query.select(*aggregates_or_groups, append=False, copy=False) 8740 8741 if orders: 8742 return query.order_by(*orders, append=False, copy=False) 8743 8744 return query 8745 8746 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8747 self._match_text_seq("AGGREGATE") 8748 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8749 8750 if self._match(TokenType.GROUP_BY) or ( 8751 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8752 ): 8753 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8754 8755 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8756 8757 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8758 first_setop = self.parse_set_operation(this=query) 8759 if not first_setop: 8760 return None 8761 8762 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8763 expr = self._parse_paren() 8764 return expr.assert_is(exp.Subquery).unnest() if expr else None 8765 8766 first_setop.this.pop() 8767 8768 setops = [ 8769 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8770 *self._parse_csv(_parse_and_unwrap_query), 8771 ] 8772 8773 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8774 with_ = query.args.get("with") 8775 ctes = with_.pop() if with_ else None 8776 8777 if isinstance(first_setop, exp.Union): 8778 query = query.union(*setops, copy=False, **first_setop.args) 8779 elif isinstance(first_setop, exp.Except): 8780 query = query.except_(*setops, copy=False, **first_setop.args) 8781 else: 8782 query = query.intersect(*setops, copy=False, **first_setop.args) 8783 8784 query.set("with", ctes) 8785 8786 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8787 8788 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8789 join = self._parse_join() 8790 if not join: 8791 return None 8792 8793 if isinstance(query, exp.Select): 8794 return query.join(join, copy=False) 8795 8796 return query 8797 8798 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8799 pivots = self._parse_pivots() 8800 if not pivots: 8801 return query 8802 8803 from_ = query.args.get("from") 8804 if from_: 8805 from_.this.set("pivots", pivots) 8806 8807 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8808 8809 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8810 self._match_text_seq("EXTEND") 8811 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8812 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8813 8814 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8815 sample = self._parse_table_sample() 8816 8817 with_ = query.args.get("with") 8818 if with_: 8819 with_.expressions[-1].this.set("sample", sample) 8820 else: 8821 query.set("sample", sample) 8822 8823 return query 8824 8825 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8826 if isinstance(query, exp.Subquery): 8827 query = exp.select("*").from_(query, copy=False) 8828 8829 if not query.args.get("from"): 8830 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8831 8832 while self._match(TokenType.PIPE_GT): 8833 start = self._curr 8834 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8835 if not parser: 8836 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8837 # keywords, making it tricky to disambiguate them without lookahead. The approach 8838 # here is to try and parse a set operation and if that fails, then try to parse a 8839 # join operator. If that fails as well, then the operator is not supported. 8840 parsed_query = self._parse_pipe_syntax_set_operator(query) 8841 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8842 if not parsed_query: 8843 self._retreat(start) 8844 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8845 break 8846 query = parsed_query 8847 else: 8848 query = parser(self, query) 8849 8850 return query 8851 8852 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8853 vars = self._parse_csv(self._parse_id_var) 8854 if not vars: 8855 return None 8856 8857 return self.expression( 8858 exp.DeclareItem, 8859 this=vars, 8860 kind=self._parse_types(), 8861 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8862 ) 8863 8864 def _parse_declare(self) -> exp.Declare | exp.Command: 8865 start = self._prev 8866 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8867 8868 if not expressions or self._curr: 8869 return self._parse_as_command(start) 8870 8871 return self.expression(exp.Declare, expressions=expressions) 8872 8873 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8874 exp_class = exp.Cast if strict else exp.TryCast 8875 8876 if exp_class == exp.TryCast: 8877 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8878 8879 return self.expression(exp_class, **kwargs) 8880 8881 def _parse_json_value(self) -> exp.JSONValue: 8882 this = self._parse_bitwise() 8883 self._match(TokenType.COMMA) 8884 path = self._parse_bitwise() 8885 8886 returning = self._match(TokenType.RETURNING) and self._parse_type() 8887 8888 return self.expression( 8889 exp.JSONValue, 8890 this=this, 8891 path=self.dialect.to_json_path(path), 8892 returning=returning, 8893 on_condition=self._parse_on_condition(), 8894 ) 8895 8896 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8897 def concat_exprs( 8898 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8899 ) -> exp.Expression: 8900 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8901 concat_exprs = [ 8902 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8903 ] 8904 node.set("expressions", concat_exprs) 8905 return node 8906 if len(exprs) == 1: 8907 return exprs[0] 8908 return self.expression(exp.Concat, expressions=args, safe=True) 8909 8910 args = self._parse_csv(self._parse_lambda) 8911 8912 if args: 8913 order = args[-1] if isinstance(args[-1], exp.Order) else None 8914 8915 if order: 8916 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8917 # remove 'expr' from exp.Order and add it back to args 8918 args[-1] = order.this 8919 order.set("this", concat_exprs(order.this, args)) 8920 8921 this = order or concat_exprs(args[0], args) 8922 else: 8923 this = None 8924 8925 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8926 8927 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1589 def __init__( 1590 self, 1591 error_level: t.Optional[ErrorLevel] = None, 1592 error_message_context: int = 100, 1593 max_errors: int = 3, 1594 dialect: DialectType = None, 1595 ): 1596 from sqlglot.dialects import Dialect 1597 1598 self.error_level = error_level or ErrorLevel.IMMEDIATE 1599 self.error_message_context = error_message_context 1600 self.max_errors = max_errors 1601 self.dialect = Dialect.get_or_raise(dialect) 1602 self.reset()
1615 def parse( 1616 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1617 ) -> t.List[t.Optional[exp.Expression]]: 1618 """ 1619 Parses a list of tokens and returns a list of syntax trees, one tree 1620 per parsed SQL statement. 1621 1622 Args: 1623 raw_tokens: The list of tokens. 1624 sql: The original SQL string, used to produce helpful debug messages. 1625 1626 Returns: 1627 The list of the produced syntax trees. 1628 """ 1629 return self._parse( 1630 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1631 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1633 def parse_into( 1634 self, 1635 expression_types: exp.IntoType, 1636 raw_tokens: t.List[Token], 1637 sql: t.Optional[str] = None, 1638 ) -> t.List[t.Optional[exp.Expression]]: 1639 """ 1640 Parses a list of tokens into a given Expression type. If a collection of Expression 1641 types is given instead, this method will try to parse the token list into each one 1642 of them, stopping at the first for which the parsing succeeds. 1643 1644 Args: 1645 expression_types: The expression type(s) to try and parse the token list into. 1646 raw_tokens: The list of tokens. 1647 sql: The original SQL string, used to produce helpful debug messages. 1648 1649 Returns: 1650 The target Expression. 1651 """ 1652 errors = [] 1653 for expression_type in ensure_list(expression_types): 1654 parser = self.EXPRESSION_PARSERS.get(expression_type) 1655 if not parser: 1656 raise TypeError(f"No parser registered for {expression_type}") 1657 1658 try: 1659 return self._parse(parser, raw_tokens, sql) 1660 except ParseError as e: 1661 e.errors[0]["into_expression"] = expression_type 1662 errors.append(e) 1663 1664 raise ParseError( 1665 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1666 errors=merge_errors(errors), 1667 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1707 def check_errors(self) -> None: 1708 """Logs or raises any found errors, depending on the chosen error level setting.""" 1709 if self.error_level == ErrorLevel.WARN: 1710 for error in self.errors: 1711 logger.error(str(error)) 1712 elif self.error_level == ErrorLevel.RAISE and self.errors: 1713 raise ParseError( 1714 concat_messages(self.errors, self.max_errors), 1715 errors=merge_errors(self.errors), 1716 )
Logs or raises any found errors, depending on the chosen error level setting.
1718 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1719 """ 1720 Appends an error in the list of recorded errors or raises it, depending on the chosen 1721 error level setting. 1722 """ 1723 token = token or self._curr or self._prev or Token.string("") 1724 start = token.start 1725 end = token.end + 1 1726 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1727 highlight = self.sql[start:end] 1728 end_context = self.sql[end : end + self.error_message_context] 1729 1730 error = ParseError.new( 1731 f"{message}. Line {token.line}, Col: {token.col}.\n" 1732 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1733 description=message, 1734 line=token.line, 1735 col=token.col, 1736 start_context=start_context, 1737 highlight=highlight, 1738 end_context=end_context, 1739 ) 1740 1741 if self.error_level == ErrorLevel.IMMEDIATE: 1742 raise error 1743 1744 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1746 def expression( 1747 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1748 ) -> E: 1749 """ 1750 Creates a new, validated Expression. 1751 1752 Args: 1753 exp_class: The expression class to instantiate. 1754 comments: An optional list of comments to attach to the expression. 1755 kwargs: The arguments to set for the expression along with their respective values. 1756 1757 Returns: 1758 The target expression. 1759 """ 1760 instance = exp_class(**kwargs) 1761 instance.add_comments(comments) if comments else self._add_comments(instance) 1762 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1769 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1770 """ 1771 Validates an Expression, making sure that all its mandatory arguments are set. 1772 1773 Args: 1774 expression: The expression to validate. 1775 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1776 1777 Returns: 1778 The validated expression. 1779 """ 1780 if self.error_level != ErrorLevel.IGNORE: 1781 for error_message in expression.error_messages(args): 1782 self.raise_error(error_message) 1783 1784 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4874 def parse_set_operation( 4875 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4876 ) -> t.Optional[exp.Expression]: 4877 start = self._index 4878 _, side_token, kind_token = self._parse_join_parts() 4879 4880 side = side_token.text if side_token else None 4881 kind = kind_token.text if kind_token else None 4882 4883 if not self._match_set(self.SET_OPERATIONS): 4884 self._retreat(start) 4885 return None 4886 4887 token_type = self._prev.token_type 4888 4889 if token_type == TokenType.UNION: 4890 operation: t.Type[exp.SetOperation] = exp.Union 4891 elif token_type == TokenType.EXCEPT: 4892 operation = exp.Except 4893 else: 4894 operation = exp.Intersect 4895 4896 comments = self._prev.comments 4897 4898 if self._match(TokenType.DISTINCT): 4899 distinct: t.Optional[bool] = True 4900 elif self._match(TokenType.ALL): 4901 distinct = False 4902 else: 4903 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4904 if distinct is None: 4905 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4906 4907 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4908 "STRICT", "CORRESPONDING" 4909 ) 4910 if self._match_text_seq("CORRESPONDING"): 4911 by_name = True 4912 if not side and not kind: 4913 kind = "INNER" 4914 4915 on_column_list = None 4916 if by_name and self._match_texts(("ON", "BY")): 4917 on_column_list = self._parse_wrapped_csv(self._parse_column) 4918 4919 expression = self._parse_select( 4920 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4921 ) 4922 4923 return self.expression( 4924 operation, 4925 comments=comments, 4926 this=this, 4927 distinct=distinct, 4928 by_name=by_name, 4929 expression=expression, 4930 side=side, 4931 kind=kind, 4932 on=on_column_list, 4933 )